aboutsummaryrefslogtreecommitdiff
path: root/src/param.cpp
diff options
context:
space:
mode:
authorxiangzhou2016-05-23 17:05:35 -0400
committerxiangzhou2016-05-23 17:05:35 -0400
commit943e970c9cbc184dcca679fbe455f48c32242cdc (patch)
tree70369d969dee3432e09e345c8106a541ac0d5a76 /src/param.cpp
parent3ab77854a52ac016b9e2b824858f5f0c695d4170 (diff)
downloadpangemma-943e970c9cbc184dcca679fbe455f48c32242cdc.tar.gz
version 0.95alpha
Diffstat (limited to 'src/param.cpp')
-rw-r--r--src/param.cpp878
1 files changed, 634 insertions, 244 deletions
diff --git a/src/param.cpp b/src/param.cpp
index 33b7b48..0a63a16 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -64,7 +64,7 @@ n_accept(0),
n_mh(10),
geo_mean(2000.0),
randseed(-1),
-window_cm(0), window_bp(0), window_ns(0),
+window_cm(0), window_bp(0), window_ns(0), n_block(200),
error(false),
ni_subsample(0), n_cvt(1), n_vc(1),
time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), time_UtZ(0.0), time_opt(0.0), time_Omega(0.0)
@@ -77,19 +77,27 @@ void PARAM::ReadFiles (void)
{
string file_str;
-
- if (!file_cat.empty()) {
+ //read cat file
+ if (!file_mcat.empty()) {
+ if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;}
+ } else if (!file_cat.empty()) {
if (ReadFile_cat (file_cat, mapRS2cat, n_vc)==false) {error=true;}
}
- if (!file_var.empty()) {
- if (ReadFile_var (file_var, mapRS2var)==false) {error=true;}
+ //read snp weight files
+ if (!file_wcat.empty()) {
+ if (ReadFile_wsnp (file_wcat, n_vc, mapRS2wcat)==false) {error=true;}
+ }
+ if (!file_wsnp.empty()) {
+ if (ReadFile_wsnp (file_wsnp, mapRS2wsnp)==false) {error=true;}
}
+ //count number of kinship files
if (!file_mk.empty()) {
if (CountFileLines (file_mk, n_vc)==false) {error=true;}
}
+ //read snp set
if (!file_snps.empty()) {
if (ReadFile_snps (file_snps, setSnps)==false) {error=true;}
} else {
@@ -184,10 +192,17 @@ void PARAM::ReadFiles (void)
//read genotype and phenotype file for plink format
if (!file_bfile.empty()) {
file_str=file_bfile+".bim";
+ snpInfo.clear();
if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
- file_str=file_bfile+".fam";
- if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;}
+ //if both fam file and pheno files are used, use phenotypes inside the pheno file
+ if (!file_pheno.empty()) {
+ //phenotype file before genotype file
+ if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
+ } else {
+ file_str=file_bfile+".fam";
+ if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;}
+ }
//post-process covariates and phenotypes, obtain ni_test, save all useful covariates
ProcessCvtPhen();
@@ -228,6 +243,97 @@ void PARAM::ReadFiles (void)
ns_total=indicator_snp.size();
}
+
+ //read genotype file for multiple plink files
+ if (!file_mbfile.empty()) {
+ igzstream infile (file_mbfile.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open mbfile file: "<<file_mbfile<<endl; return;}
+
+ string file_name;
+
+ size_t t=0, ns_test_tmp=0;
+
+ gsl_matrix *W;
+ while (!safeGetline(infile, file_name).eof()) {
+ file_str=file_name+".bim";
+
+ if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
+
+ if (t==0) {
+ //if both fam file and pheno files are used, use phenotypes inside the pheno file
+ if (!file_pheno.empty()) {
+ //phenotype file before genotype file
+ if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
+ } else {
+ file_str=file_name+".fam";
+ if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;}
+ }
+
+ //post-process covariates and phenotypes, obtain ni_test, save all useful covariates
+ ProcessCvtPhen();
+
+ //obtain covariate matrix
+ W=gsl_matrix_alloc (ni_test, n_cvt);
+ CopyCvt (W);
+ }
+
+ file_str=file_name+".bed";
+ if (ReadFile_bed (file_str, setSnps, W, indicator_idv, indicator_snp, snpInfo, maf_level, miss_level, hwe_level, r2_level, ns_test_tmp)==false) {error=true;}
+ mindicator_snp.push_back(indicator_snp);
+ msnpInfo.push_back(snpInfo);
+ ns_test+=ns_test_tmp;
+ ns_total+=indicator_snp.size();
+
+ t++;
+ }
+
+ gsl_matrix_free(W);
+
+ infile.close();
+ infile.clear();
+ }
+
+
+
+ //read genotype and phenotype file for multiple bimbam files
+ if (!file_mgeno.empty()) {
+ //annotation file before genotype file
+ if (!file_anno.empty() ) {
+ if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, mapRS2cM)==false) {error=true;}
+ }
+
+ //phenotype file before genotype file
+ if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
+
+ //post-process covariates and phenotypes, obtain ni_test, save all useful covariates
+ ProcessCvtPhen();
+
+ //obtain covariate matrix
+ gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
+ CopyCvt (W);
+
+ igzstream infile (file_mgeno.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open mgeno file: "<<file_mgeno<<endl; return;}
+
+ string file_name;
+ size_t ns_test_tmp;
+ while (!safeGetline(infile, file_name).eof()) {
+ if (ReadFile_geno (file_name, setSnps, W, indicator_idv, indicator_snp, maf_level, miss_level, hwe_level, r2_level, mapRS2chr, mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp)==false) {error=true;}
+
+ mindicator_snp.push_back(indicator_snp);
+ msnpInfo.push_back(snpInfo);
+ ns_test+=ns_test_tmp;
+ ns_total+=indicator_snp.size();
+ }
+
+ gsl_matrix_free(W);
+
+ infile.close();
+ infile.clear();
+ }
+
+
+
if (!file_gene.empty()) {
if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
@@ -292,7 +398,7 @@ void PARAM::CheckParam (void)
//check parameters
if (k_mode!=1 && k_mode!=2) {cout<<"error! unknown kinship/relatedness input mode: "<<k_mode<<endl; error=true;}
- if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && a_mode!=21 && a_mode!=22 && a_mode!=25 && a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && a_mode!=62 && a_mode!=71)
+ if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && a_mode!=21 && a_mode!=22 && a_mode!=25 && a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && a_mode!=62 && a_mode!=66 && a_mode!=67 && a_mode!=71)
{cout<<"error! unknown analysis mode: "<<a_mode<<". make sure -gk or -eigen or -lmm or -bslmm -predict or -calccov is sepcified correctly."<<endl; error=true;}
if (miss_level>1) {cout<<"error! missing level needs to be between 0 and 1. current value = "<<miss_level<<endl; error=true;}
if (maf_level>0.5) {cout<<"error! maf level needs to be between 0 and 0.5. current value = "<<maf_level<<endl; error=true;}
@@ -400,8 +506,8 @@ void PARAM::CheckParam (void)
str=file_cat;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open category file: "<<str<<endl; error=true;}
- str=file_var;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open category file: "<<str<<endl; error=true;}
+ str=file_mcat;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mcategory file: "<<str<<endl; error=true;}
str=file_beta;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open beta file: "<<str<<endl; error=true;}
@@ -409,23 +515,33 @@ void PARAM::CheckParam (void)
str=file_cor;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open correlation file: "<<str<<endl; error=true;}
- str=file_q;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open q file: "<<str<<endl; error=true;}
+ if (!file_study.empty()) {
+ str=file_study+".Vq.txt";
+ if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .Vq.txt file: "<<str<<endl; error=true;}
+ str=file_study+".q.txt";
+ if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .q.txt file: "<<str<<endl; error=true;}
+ str=file_study+".size.txt";
+ if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .size.txt file: "<<str<<endl; error=true;}
+ }
- str=file_s;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open s file: "<<str<<endl; error=true;}
+ if (!file_ref.empty()) {
+ str=file_ref+".S.txt";
+ if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .S.txt file: "<<str<<endl; error=true;}
+ str=file_ref+".size.txt";
+ if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .size.txt file: "<<str<<endl; error=true;}
+ }
- str=file_v;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open v file: "<<str<<endl; error=true;}
+ str=file_mstudy;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mstudy file: "<<str<<endl; error=true;}
- str=file_mq;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mq file: "<<str<<endl; error=true;}
+ str=file_mref;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mref file: "<<str<<endl; error=true;}
- str=file_ms;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open ms file: "<<str<<endl; error=true;}
+ str=file_mgeno;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mgeno file: "<<str<<endl; error=true;}
- str=file_mv;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mv file: "<<str<<endl; error=true;}
+ str=file_mbfile;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mbfile file: "<<str<<endl; error=true;}
size_t flag=0;
if (!file_bfile.empty()) {flag++;}
@@ -434,7 +550,7 @@ void PARAM::CheckParam (void)
// WJA added
if (!file_oxford.empty()) {flag++;}
- if (flag!=1 && a_mode!=27 && a_mode!=28 && a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62) {
+ if (flag!=1 && a_mode!=27 && a_mode!=28 && a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62 && a_mode!=66 && a_mode!=67) {
cout<<"error! either plink binary files, or bimbam mean genotype files, or gene expression files are required."<<endl; error=true;
}
@@ -443,21 +559,30 @@ void PARAM::CheckParam (void)
}
if (a_mode==61 || a_mode==62) {
- if (!file_pheno.empty()) {
+ if (!file_beta.empty()) {
+ if ( file_mbfile.empty() && file_bfile.empty() && file_mgeno.empty() && file_geno.empty() && file_mref.empty() && file_ref.empty() ) {
+ cout<<"error! missing genotype file or ref/mref file."<<endl; error=true;
+ }
+ } else if (!file_pheno.empty()) {
if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) && file_mk.empty() ) {
cout<<"error! missing relatedness file. "<<endl; error=true;
}
+ /*
} else if (!file_cor.empty()) {
if (file_beta.empty() ) {
cout<<"error! missing cor file."<<endl; error=true;
}
- } else {
- if ( (file_mq.empty() || file_ms.empty() || file_mv.empty() ) && (file_q.empty() || file_s.empty() || file_v.empty() ) ) {
- cout<<"error! either phenotype/kinship files or ms/mq/mv s/q/v files are required."<<endl; error=true;
- }
+ */
+ } else if ( (file_mstudy.empty() && file_study.empty()) || (file_mref.empty() && file_ref.empty() ) ) {
+ cout<<"error! either beta file, or phenotype files or study/ref mstudy/mref files are required."<<endl; error=true;
}
}
+ if (a_mode==66 || a_mode==67) {
+ if (file_beta.empty() || ( file_mbfile.empty() && file_bfile.empty() && file_mgeno.empty() && file_geno.empty()) ) {
+ cout<<"error! missing beta file or genotype file."<<endl; error=true;
+ }
+ }
if (!file_epm.empty() && file_bfile.empty() && file_geno.empty() ) {cout<<"error! estimated parameter file also requires genotype file."<<endl; error=true;}
@@ -525,13 +650,16 @@ void PARAM::CheckParam (void)
void PARAM::CheckData (void) {
if(file_oxford.empty()) // WJA NOTE: I added this condition so that covariates can be added through sample, probably not exactly what is wanted
-
{
if ((file_cvt).empty() || (indicator_cvt).size()==0) {
n_cvt=1;
}
}
+ if ( (a_mode==66 || a_mode==67) && (v_pve.size()!=n_vc)) {
+ cout<<"error! the number of pve estimates does not equal to the number of categories in the cat file:"<<v_pve.size()<<" "<<n_vc<<endl; error=true;
+ }
+
if ( (indicator_cvt).size()!=0 && (indicator_cvt).size()!=(indicator_idv).size()) {
error=true;
cout<<"error! number of rows in the covariates file do not match the number of individuals. "<<endl;
@@ -610,7 +738,7 @@ void PARAM::CheckData (void) {
}
}
*/
- if (ni_test==0 && file_cor.empty() && file_mq.empty() && file_q.empty() && file_beta.empty() ) {
+ if (ni_test==0 && file_cor.empty() && file_mstudy.empty() && file_study.empty() && file_beta.empty() ) {
error=true;
cout<<"error! number of analyzed individuals equals 0. "<<endl;
return;
@@ -631,7 +759,7 @@ void PARAM::CheckData (void) {
}
//output some information
- if (file_cor.empty() && file_mq.empty() && file_q.empty() ) {
+ if (file_cor.empty() && file_mstudy.empty() && file_study.empty() && a_mode!=27 && a_mode!=28) {
cout<<"## number of total individuals = "<<ni_total<<endl;
if (a_mode==43) {
cout<<"## number of analyzed individuals = "<<ni_cvt<<endl;
@@ -709,6 +837,9 @@ void PARAM::CheckData (void) {
}
}
+ if (a_mode==62 && !file_beta.empty() && mapRS2wcat.size()==0) {cout<<"vc analysis with beta files requires -wcat file."<<endl; error=true;}
+ if (a_mode==67 && mapRS2wcat.size()==0) {cout<<"ci analysis with beta files requires -wcat file."<<endl; error=true;}
+
//file_mk needs to contain more than one line
if (n_vc==1 && !file_mk.empty()) {cout<<"error! -mk file should contain more than one line."<<endl; error=true;}
@@ -783,46 +914,52 @@ void PARAM::CalcKin (gsl_matrix *matrix_kin) {
-//from an existing n by nd G matrix, compute the d by d S matrix
-void compKtoS (const gsl_matrix *G, gsl_matrix *S) {
- size_t n_vc=S->size1, ni_test=G->size1;
- double di, dj, tr_KiKj, sum_Ki, sum_Kj, s_Ki, s_Kj, s_KiKj, si, sj, d;
+//from an existing n by nd A and K matrices, compute the d by d S matrix (which is not necessary symmetric)
+void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt, gsl_matrix *S) {
+ size_t n_vc=S->size1, ni_test=A->size1;
+ double di, dj, tr_AK, sum_A, sum_K, s_A, s_K, sum_AK, tr_A, tr_K, d;
for (size_t i=0; i<n_vc; i++) {
- for (size_t j=i; j<n_vc; j++) {
- tr_KiKj=0; sum_Ki=0; sum_Kj=0; s_KiKj=0; si=0; sj=0;
+ for (size_t j=0; j<n_vc; j++) {
+ tr_AK=0; sum_A=0; sum_K=0; sum_AK=0; tr_A=0; tr_K=0;
for (size_t l=0; l<ni_test; l++) {
- s_Ki=0; s_Kj=0;
+ s_A=0; s_K=0;
for (size_t k=0; k<ni_test; k++) {
- di=gsl_matrix_get(G, l, k+ni_test*i);
- dj=gsl_matrix_get(G, l, k+ni_test*j);
- s_Ki+=di; s_Kj+=dj;
+ di=gsl_matrix_get(A, l, k+ni_test*i);
+ dj=gsl_matrix_get(K, l, k+ni_test*j);
+ s_A+=di; s_K+=dj;
- tr_KiKj+=di*dj; sum_Ki+=di; sum_Kj+=dj;
- if (l==k) {si+=di; sj+=dj;}
+ tr_AK+=di*dj; sum_A+=di; sum_K+=dj;
+ if (l==k) {tr_A+=di; tr_K+=dj;}
}
- s_KiKj+=s_Ki*s_Kj;
+ sum_AK+=s_A*s_K;
}
- sum_Ki/=(double)ni_test;
- sum_Kj/=(double)ni_test;
- s_KiKj/=(double)ni_test;
- si-=sum_Ki;
- sj-=sum_Kj;
- d=tr_KiKj-2*s_KiKj+sum_Ki*sum_Kj;
- d=d/(si*sj)-1/(double)(ni_test-1);
+ sum_A/=(double)ni_test;
+ sum_K/=(double)ni_test;
+ sum_AK/=(double)ni_test;
+ tr_A-=sum_A;
+ tr_K-=sum_K;
+ d=tr_AK-2*sum_AK+sum_A*sum_K;
+
+ if (tr_A==0 || tr_K==0) {
+ d=0;
+ } else {
+ d=d/(tr_A*tr_K)-1/(double)(ni_test-n_cvt);
+ }
gsl_matrix_set (S, i, j, d);
- if (i!=j) {gsl_matrix_set (S, j, i, d);}
}
}
+
+ //eigenlib_invert(Si);
//cout<<tr_KiKj<<" "<<s_KiKj<<" "<<sum_Ki<<" "<<sum_Kj<<" "<<si<<" "<<sj<<" "<<d*1000000<<endl;
return;
}
-//copied from lmm.cpp; is used in the following function compKtoQ
+//copied from lmm.cpp; is used in the following function compKtoV
//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
@@ -836,20 +973,19 @@ size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
return index;
}
-//from an existing n by nd (centered) G matrix, compute the d+1 by d*(d+1) Q matrix
-//where inside i'th d+1 by d+1 matrix, each element is tr(KiKjKiKl)-r*tr(KjKiKl)-r*tr(KlKiKj)+r^2*tr(KjKl), where r=n/(n-1)
-void compKtoQ (const gsl_matrix *G, gsl_matrix *Q) {
+//from an existing n by nd (centered) G matrix, compute the d+1 by d*(d-1)/2*(d+1) Q matrix
+//where inside i'th d+1 by d+1 matrix, each element is tr(KiKlKjKm)-r*tr(KmKiKl)-r*tr(KlKjKm)+r^2*tr(KlKm), where r=n/(n-1)
+void compKtoV (const gsl_matrix *G, gsl_matrix *V) {
size_t n_vc=G->size2/G->size1, ni_test=G->size1;
- gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, n_vc*(n_vc+1)/2*ni_test);
- gsl_vector *trKiKjKi=gsl_vector_alloc ( n_vc*n_vc );
+ gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, (n_vc*(n_vc+1))/2*ni_test);
gsl_vector *trKiKj=gsl_vector_alloc( n_vc*(n_vc+1)/2 );
gsl_vector *trKi=gsl_vector_alloc(n_vc);
double d, tr, r=(double)ni_test/(double)(ni_test-1);
- size_t t, t_ij, t_il, t_jl, t_ii;
+ size_t t, t_il, t_jm, t_lm, t_im, t_jl, t_ij;
- //compute KiKj for all pairs of i and j (including the identity matrix)
+ //compute KiKj for all pairs of i and j (not including the identity matrix)
t=0;
for (size_t i=0; i<n_vc; i++) {
gsl_matrix_const_view Ki=gsl_matrix_const_submatrix(G, 0, i*ni_test, ni_test, ni_test);
@@ -889,99 +1025,108 @@ void compKtoQ (const gsl_matrix *G, gsl_matrix *Q) {
gsl_vector_set (trKi, i, tr);
}
- //compute trKiKjKi (it is not symmetric w.r.t. i and j)
+ //compute V
for (size_t i=0; i<n_vc; i++) {
- for (size_t j=0; j<n_vc; j++) {
- tr=0;
- t=GetabIndex (i+1, j+1, n_vc-2);
- for (size_t k=0; k<ni_test; k++) {
- gsl_vector_const_view KiKj_row=gsl_matrix_const_subrow (KiKj, k, t*ni_test, ni_test);
- gsl_vector_const_view KiKj_col=gsl_matrix_const_column (KiKj, t*ni_test+k);
-
- gsl_vector_const_view Ki_col=gsl_matrix_const_column (G, i*ni_test+k);
-
- if (i<=j) {
- gsl_blas_ddot (&KiKj_row.vector, &Ki_col.vector, &d);
- tr+=d;
- } else {
- gsl_blas_ddot (&KiKj_col.vector, &Ki_col.vector, &d);
- tr+=d;
- }
- }
- gsl_vector_set (trKiKjKi, i*n_vc+j, tr);
- }
- }
+ for (size_t j=i; j<n_vc; j++) {
+ t_ij=GetabIndex (i+1, j+1, n_vc-2);
+ for (size_t l=0; l<n_vc+1; l++) {
+ for (size_t m=0; m<n_vc+1; m++) {
+ if (l!=n_vc && m!=n_vc) {
+ t_il=GetabIndex (i+1, l+1, n_vc-2);
+ t_jm=GetabIndex (j+1, m+1, n_vc-2);
+ t_lm=GetabIndex (l+1, m+1, n_vc-2);
+ //cout<<ni_test<<" "<<r<<t_ij<<" "<<t_il<<" "<<t_jl<<" "<<endl;
+ tr=0;
+ for (size_t k=0; k<ni_test; k++) {
+ gsl_vector_const_view KiKl_row=gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test);
+ gsl_vector_const_view KiKl_col=gsl_matrix_const_column (KiKj, t_il*ni_test+k);
+ gsl_vector_const_view KjKm_row=gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test);
+ gsl_vector_const_view KjKm_col=gsl_matrix_const_column (KiKj, t_jm*ni_test+k);
+
+ gsl_vector_const_view Kl_row=gsl_matrix_const_subrow (G, k, l*ni_test, ni_test);
+ gsl_vector_const_view Km_row=gsl_matrix_const_subrow (G, k, m*ni_test, ni_test);
+
+ if (i<=l && j<=m) {
+ gsl_blas_ddot (&KiKl_row.vector, &KjKm_col.vector, &d);
+ tr+=d;
+ gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d);
+ tr-=r*d;
+ gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d);
+ tr-=r*d;
+ } else if (i<=l && j>m) {
+ gsl_blas_ddot (&KiKl_row.vector, &KjKm_row.vector, &d);
+ tr+=d;
+ gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d);
+ tr-=r*d;
+ gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d);
+ tr-=r*d;
+ } else if (i>l && j<=m) {
+ gsl_blas_ddot (&KiKl_col.vector, &KjKm_col.vector, &d);
+ tr+=d;
+ gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d);
+ tr-=r*d;
+ gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d);
+ tr-=r*d;
+ } else {
+ gsl_blas_ddot (&KiKl_col.vector, &KjKm_row.vector, &d);
+ tr+=d;
+ gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d);
+ tr-=r*d;
+ gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d);
+ tr-=r*d;
+ }
+ }
- //compute Q
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=0; j<n_vc+1; j++) {
- for (size_t l=j; l<n_vc+1; l++) {
- if (j!=n_vc && l!=n_vc) {
- t_ij=GetabIndex (i+1, j+1, n_vc-2);
- t_il=GetabIndex (i+1, l+1, n_vc-2);
- t_jl=GetabIndex (j+1, l+1, n_vc-2);
-
- //cout<<ni_test<<" "<<r<<t_ij<<" "<<t_il<<" "<<t_jl<<" "<<endl;
- tr=0;
- for (size_t k=0; k<ni_test; k++) {
- gsl_vector_const_view KiKj_row=gsl_matrix_const_subrow (KiKj, k, t_ij*ni_test, ni_test);
- gsl_vector_const_view KiKj_col=gsl_matrix_const_column (KiKj, t_ij*ni_test+k);
- gsl_vector_const_view KiKl_row=gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test);
- gsl_vector_const_view KiKl_col=gsl_matrix_const_column (KiKj, t_il*ni_test+k);
-
- gsl_vector_const_view Kj_row=gsl_matrix_const_subrow (G, k, j*ni_test, ni_test);
- gsl_vector_const_view Kl_row=gsl_matrix_const_subrow (G, k, l*ni_test, ni_test);
-
- if (i<=j && i<=l) {
- gsl_blas_ddot (&KiKj_row.vector, &KiKl_col.vector, &d);
- tr+=d;
- gsl_blas_ddot (&Kj_row.vector, &KiKl_col.vector, &d);
- tr-=r*d;
- gsl_blas_ddot (&Kl_row.vector, &KiKj_col.vector, &d);
- tr-=r*d;
- } else if (i<=j && i>l) {
- gsl_blas_ddot (&KiKj_row.vector, &KiKl_row.vector, &d);
- tr+=d;
- gsl_blas_ddot (&Kj_row.vector, &KiKl_row.vector, &d);
- tr-=r*d;
- gsl_blas_ddot (&Kl_row.vector, &KiKj_col.vector, &d);
- tr-=r*d;
- } else if (i>j && i<=l) {
- gsl_blas_ddot (&KiKj_col.vector, &KiKl_col.vector, &d);
- tr+=d;
- gsl_blas_ddot (&Kj_row.vector, &KiKl_col.vector, &d);
- tr-=r*d;
- gsl_blas_ddot (&Kl_row.vector, &KiKj_row.vector, &d);
- tr-=r*d;
- } else {
- gsl_blas_ddot (&KiKj_col.vector, &KiKl_row.vector, &d);
- tr+=d;
- gsl_blas_ddot (&Kj_row.vector, &KiKl_row.vector, &d);
- tr-=r*d;
- gsl_blas_ddot (&Kl_row.vector, &KiKj_row.vector, &d);
- tr-=r*d;
+ tr+=r*r*gsl_vector_get (trKiKj, t_lm);
+ } else if (l!=n_vc && m==n_vc) {
+ t_il=GetabIndex (i+1, l+1, n_vc-2);
+ t_jl=GetabIndex (j+1, l+1, n_vc-2);
+ tr=0;
+ for (size_t k=0; k<ni_test; k++) {
+ gsl_vector_const_view KiKl_row=gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test);
+ gsl_vector_const_view KiKl_col=gsl_matrix_const_column (KiKj, t_il*ni_test+k);
+ gsl_vector_const_view Kj_row=gsl_matrix_const_subrow (G, k, j*ni_test, ni_test);
+
+ if (i<=l) {
+ gsl_blas_ddot (&KiKl_row.vector, &Kj_row.vector, &d);
+ tr+=d;
+ } else {
+ gsl_blas_ddot (&KiKl_col.vector, &Kj_row.vector, &d);
+ tr+=d;
+ }
}
+ tr+=-r*gsl_vector_get (trKiKj, t_il)-r*gsl_vector_get (trKiKj, t_jl)+r*r*gsl_vector_get (trKi, l);
+ } else if (l==n_vc && m!=n_vc) {
+ t_jm=GetabIndex (j+1, m+1, n_vc-2);
+ t_im=GetabIndex (i+1, m+1, n_vc-2);
+ tr=0;
+ for (size_t k=0; k<ni_test; k++) {
+ gsl_vector_const_view KjKm_row=gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test);
+ gsl_vector_const_view KjKm_col=gsl_matrix_const_column (KiKj, t_jm*ni_test+k);
+ gsl_vector_const_view Ki_row=gsl_matrix_const_subrow (G, k, i*ni_test, ni_test);
+
+ if (j<=m) {
+ gsl_blas_ddot (&KjKm_row.vector, &Ki_row.vector, &d);
+ tr+=d;
+ } else {
+ gsl_blas_ddot (&KjKm_col.vector, &Ki_row.vector, &d);
+ tr+=d;
+ }
+ }
+ tr+=-r*gsl_vector_get (trKiKj, t_im)-r*gsl_vector_get (trKiKj, t_jm)+r*r*gsl_vector_get (trKi, m);
+ } else {
+ tr=gsl_vector_get (trKiKj, t_ij)-r*gsl_vector_get (trKi, i)-r*gsl_vector_get (trKi, j)+r*r*(double)(ni_test-1);
}
- tr+=r*r*gsl_vector_get (trKiKj, t_jl);
- } else if (j!=n_vc && l==n_vc) {
- t_ij=GetabIndex (i+1, j+1, n_vc-2);
- tr=gsl_vector_get (trKiKjKi, i*n_vc+j)-2*r*gsl_vector_get (trKiKj, t_ij)+r*r*gsl_vector_get (trKi, j);
- } else if (j==n_vc && l==n_vc) {
- t_ii=GetabIndex (i+1, i+1, n_vc-2);
- tr=gsl_vector_get (trKiKj, t_ii)-2*r*gsl_vector_get (trKi, i)+r*r*(double)(ni_test-1);
+ gsl_matrix_set (V, l, t_ij*(n_vc+1)+m, tr);
}
-
- gsl_matrix_set (Q, j, i*(n_vc+1)+l, tr);
- if (l!=j) {gsl_matrix_set (Q, l, i*(n_vc+1)+j, tr);}
}
}
}
- gsl_matrix_scale (Q, 1.0/pow((double)ni_test, 2) );
+ gsl_matrix_scale (V, 1.0/pow((double)ni_test, 2) );
gsl_matrix_free(KiKj);
- gsl_vector_free(trKiKjKi);
gsl_vector_free(trKiKj);
gsl_vector_free(trKi);
@@ -991,190 +1136,210 @@ void compKtoQ (const gsl_matrix *G, gsl_matrix *Q) {
//perform Jacknife sampling for variance of S
-void JacknifeGtoS (const gsl_matrix *G, gsl_matrix *S, gsl_matrix *Svar) {
- size_t n_vc=Svar->size1, ni_test=G->size1;
- vector<vector<vector<double> > > tr_KiKj, s_KiKj;
- vector<vector<double> > sum_Ki, s_Ki, si;
+void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A, const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) {
+ size_t n_vc=Svar->size1, ni_test=A->size1, n_cvt=W->size2;
+
+ vector<vector<vector<double> > > trAK, sumAK;
+ vector<vector<double> > sumA, sumK, trA, trK, sA, sK;
vector<double> vec_tmp;
double di, dj, d, m, v;
+ //gsl_matrix *Stmp=gsl_matrix_alloc (n_vc, ni_test*n_vc);
+ //gsl_matrix *Stmp_sub=gsl_matrix_alloc (n_vc, n_vc);
+
//initialize and set all elements to zero
for (size_t i=0; i<ni_test; i++) {
vec_tmp.push_back(0);
}
for (size_t i=0; i<n_vc; i++) {
- sum_Ki.push_back(vec_tmp);
- s_Ki.push_back(vec_tmp);
- si.push_back(vec_tmp);
+ sumA.push_back(vec_tmp);
+ sumK.push_back(vec_tmp);
+ trA.push_back(vec_tmp);
+ trK.push_back(vec_tmp);
+ sA.push_back(vec_tmp);
+ sK.push_back(vec_tmp);
}
for (size_t i=0; i<n_vc; i++) {
- tr_KiKj.push_back(sum_Ki);
- s_KiKj.push_back(sum_Ki);
+ trAK.push_back(sumK);
+ sumAK.push_back(sumK);
}
- //run jacknife
+ //run jackknife
for (size_t i=0; i<n_vc; i++) {
for (size_t l=0; l<ni_test; l++) {
for (size_t k=0; k<ni_test; k++) {
- di=gsl_matrix_get(G, l, k+ni_test*i);
+ di=gsl_matrix_get(A, l, k+ni_test*i);
+ dj=gsl_matrix_get(K, l, k+ni_test*i);
for (size_t t=0; t<ni_test; t++) {
if (t==l || t==k) {continue;}
- sum_Ki[i][t]+=di;
- if (l==k) {si[i][t]+=di;}
+ sumA[i][t]+=di;
+ sumK[i][t]+=dj;
+ if (l==k) {trA[i][t]+=di; trK[i][t]+=dj;}
}
- s_Ki[i][l]+=di;
+ sA[i][l]+=di;
+ sK[i][l]+=dj;
}
}
for (size_t t=0; t<ni_test; t++) {
- sum_Ki[i][t]/=(double)(ni_test-1);
+ sumA[i][t]/=(double)(ni_test-1);
+ sumK[i][t]/=(double)(ni_test-1);
}
}
for (size_t i=0; i<n_vc; i++) {
- for (size_t j=i; j<n_vc; j++) {
+ for (size_t j=0; j<n_vc; j++) {
for (size_t l=0; l<ni_test; l++) {
for (size_t k=0; k<ni_test; k++) {
- di=gsl_matrix_get(G, l, k+ni_test*i);
- dj=gsl_matrix_get(G, l, k+ni_test*j);
+ di=gsl_matrix_get(A, l, k+ni_test*i);
+ dj=gsl_matrix_get(K, l, k+ni_test*j);
d=di*dj;
for (size_t t=0; t<ni_test; t++) {
if (t==l || t==k) {continue;}
- tr_KiKj[i][j][t]+=d;
+ trAK[i][j][t]+=d;
}
}
for (size_t t=0; t<ni_test; t++) {
if (t==l) {continue;}
- di=gsl_matrix_get(G, l, t+ni_test*i);
- dj=gsl_matrix_get(G, l, t+ni_test*j);
+ di=gsl_matrix_get(A, l, t+ni_test*i);
+ dj=gsl_matrix_get(K, l, t+ni_test*j);
- s_KiKj[i][j][t]+=(s_Ki[i][l]-di)*(s_Ki[j][l]-dj);
+ sumAK[i][j][t]+=(sA[i][l]-di)*(sK[j][l]-dj);
}
}
for (size_t t=0; t<ni_test; t++) {
- s_KiKj[i][j][t]/=(double)(ni_test-1);
+ sumAK[i][j][t]/=(double)(ni_test-1);
}
m=0; v=0;
for (size_t t=0; t<ni_test; t++) {
- d=tr_KiKj[i][j][t]-2*s_KiKj[i][j][t]+sum_Ki[i][t]*sum_Ki[j][t];
- d/=(si[i][t]-sum_Ki[i][t])*(si[j][t]-sum_Ki[j][t]);
- d-=1/(double)(ni_test-2);
-
+ d=trAK[i][j][t]-2*sumAK[i][j][t]+sumA[i][t]*sumK[j][t];
+ if ( (trA[i][t]-sumA[i][t])==0 || (trK[j][t]-sumK[j][t])==0) {
+ d=0;
+ } else {
+ d/=(trA[i][t]-sumA[i][t])*(trK[j][t]-sumK[j][t]);
+ d-=1/(double)(ni_test-n_cvt-1);
+ }
+ //gsl_matrix_set (Stmp, i, t*n_vc+j, d);
+ //gsl_matrix_set (Stmp, j, t*n_vc+i, d);
m+=d; v+=d*d;
}
m/=(double)ni_test;
v/=(double)ni_test;
v-=m*m;
v*=(double)(ni_test-1);
+ gsl_matrix_set (Svar, i, j, v);
+ if (n_cvt==1) {
+ d=gsl_matrix_get (S, i, j);
+ d=(double)ni_test*d-(double)(ni_test-1)*m;
+ gsl_matrix_set (S, i, j, d);
+ }
+ }
+ }
+
+ /*
+ for (size_t t=0; t<ni_test; t++) {
+ gsl_matrix_view Stmp_view=gsl_matrix_submatrix(Stmp, 0, t*n_vc, n_vc, n_vc);
+ gsl_matrix_memcpy (Stmp_sub, &Stmp_view.matrix);
+ eigenlib_invert(Stmp_sub);
+ gsl_matrix_memcpy (&Stmp_view.matrix, Stmp_sub);
+ }
+
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ m=0; v=0;
+ for (size_t t=0; t<ni_test; t++) {
+ d=gsl_matrix_get (Stmp, i, t*n_vc+j);
+ m+=d;
+ v+=d*d;
+ }
+ m/=(double)ni_test;
+ v/=(double)ni_test;
+ v-=m*m;
+ v*=(double)(ni_test-1);
gsl_matrix_set (Svar, i, j, v);
- d=gsl_matrix_get (S, i, j);
+ d=gsl_matrix_get (Si, i, j);
d=(double)ni_test*d-(double)(ni_test-1)*m;
- gsl_matrix_set (S, i, j, d);
- if (i!=j) {gsl_matrix_set (Svar, j, i, v); gsl_matrix_set (S, j, i, d);}
+ gsl_matrix_set (Si, i, j, d);
+ if (i!=j) {gsl_matrix_set (Svar, j, i, v); gsl_matrix_set (Si, j, i, d);}
}
}
+ gsl_matrix_free (Stmp);
+ */
return;
}
//compute the d by d S matrix with its d by d variance matrix of Svar, and the d+1 by d(d+1) matrix of Q for V(q)
-void PARAM::CalcS (gsl_matrix *S, gsl_matrix *Svar, gsl_matrix *Q) {
+void PARAM::CalcS (const map<string, double> &mapRS2wA, const map<string, double> &mapRS2wK, const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns) {
string file_str;
gsl_matrix_set_zero (S);
gsl_matrix_set_zero (Svar);
- gsl_matrix_set_zero (Q);
+ gsl_vector_set_zero (ns);
//compute the kinship matrix G for multiple categories; these matrices are not centered, for convienence of Jacknife sampling
- gsl_matrix *G=gsl_matrix_alloc (ni_test, n_vc*ni_test);
- gsl_matrix_set_zero (G);
-
if (!file_bfile.empty() ) {
file_str=file_bfile+".bed";
- if (PlinkKin (file_str, indicator_idv, indicator_snp, a_mode-24, d_pace, mapRS2cat, mapRS2var, snpInfo, G)==false) {error=true;}
- } else {
+ if (mapRS2wA.size()==0) {
+ if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, mapRS2cat, snpInfo, W, K, ns)==false) {error=true;}
+ } else {
+ if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, mapRS2cat, snpInfo, W, A, ns)==false) {error=true;}
+ }
+ } else if (!file_geno.empty()) {
file_str=file_geno;
- if (BimbamKin (file_str, indicator_idv, indicator_snp, a_mode-24, d_pace, mapRS2cat, mapRS2var, snpInfo, G)==false) {error=true;}
+ if (mapRS2wA.size()==0) {
+ if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, mapRS2cat, snpInfo, W, K, ns)==false) {error=true;}
+ } else {
+ if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, mapRS2cat, snpInfo, W, A, ns)==false) {error=true;}
+ }
+ } else if (!file_mbfile.empty() ){
+ if (mapRS2wA.size()==0) {
+ if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp, mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) {error=true;}
+ } else {
+ if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp, mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) {error=true;}
+ }
+ } else if (!file_mgeno.empty()) {
+ if (mapRS2wA.size()==0) {
+ if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp, mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) {error=true;}
+ } else {
+ if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp, mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) {error=true;}
+ }
}
- //center and scale every kinship matrix inside G
- double d;
- for (size_t i=0; i<n_vc; i++) {
- gsl_matrix_view K=gsl_matrix_submatrix(G, 0, i*ni_test, ni_test, ni_test);
- CenterMatrix(&K.matrix);
- d=ScaleMatrix(&K.matrix);
+ if (mapRS2wA.size()==0) {
+ gsl_matrix_memcpy (A, K);
}
- //based on G, compute S
- compKtoS (G, S);
-
- //based on G, compute a matrix Q that can be used to calculate the variance of q
- compKtoQ (G, Q);
-
- /*
- //set up random environment
- gsl_rng_env_setup();
- gsl_rng *gsl_r;
- const gsl_rng_type * gslType;
- gslType = gsl_rng_default;
- if (randseed<0) {
- time_t rawtime;
- time (&rawtime);
- tm * ptm = gmtime (&rawtime);
-
- randseed = (unsigned) (ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec);
- }
- gsl_r = gsl_rng_alloc(gslType);
- gsl_rng_set(gsl_r, randseed);
-
- //bootstrap: in each iteration, sample individuals and compute S_pmt
- size_t n_pmt=100;
- vector<size_t> idv_order, idv_remove;
- for (size_t i=0; i<ni_test; i++) {
- idv_order.push_back(i);
- }
- for (size_t i=0; i<n_pmt; i++) {
- idv_remove.push_back(0);
- }
- gsl_ran_choose (gsl_r, static_cast<void*>(&idv_remove[0]), n_pmt, static_cast<void*>(&idv_order[0]), ni_test, sizeof(size_t));
+ //center and scale every kinship matrix inside G
+ for (size_t i=0; i<n_vc; i++) {
+ gsl_matrix_view Ksub=gsl_matrix_submatrix(K, 0, i*ni_test, ni_test, ni_test);
+ CenterMatrix(&Ksub.matrix);
+ ScaleMatrix(&Ksub.matrix);
- gsl_matrix *S_pmt=gsl_matrix_alloc(n_vc, n_vc*n_pmt);
- for (size_t i=0; i<n_pmt; i++) {
- gsl_matrix_view S_sub=gsl_matrix_submatrix (S_pmt, 0, n_vc*i, n_vc, n_vc);
- compKtoS (G, idv_remove[i], &S_sub.matrix);
+ gsl_matrix_view Asub=gsl_matrix_submatrix(A, 0, i*ni_test, ni_test, ni_test);
+ CenterMatrix(&Asub.matrix);
+ ScaleMatrix(&Asub.matrix);
}
- //based on S_pmt, compute Svar
- double m, v, d;
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=i; j<n_vc; j++) {
- m=0; v=0;
- for (size_t t=0; t<n_pmt; t++) {
- d=gsl_matrix_get(S_pmt, i, j);
- m+=d; v+=d*d;
- }
- m/=(double)n_pmt; v/=(double)n_pmt;
- v=v-m*m;
- gsl_matrix_set(Svar, i, j, v);
- if (i!=j) {gsl_matrix_set(Svar, j, i, v);}
- }
- }
- */
+ //based on G, compute S
+ compAKtoS (A, K, W->size2, S);
//compute Svar and update S with Jacknife
- JacknifeGtoS (G, S, Svar);
+ JackknifeAKtoS (W, A, K, S, Svar);
+
+ //based on G, compute a matrix Q that can be used to calculate the variance of q
+ //compKtoV (G, V);
- gsl_matrix_free(G);
return;
}
@@ -1223,11 +1388,20 @@ void PARAM::WriteVar (const string suffix)
outfile.precision(10);
- for (size_t i=0; i<indicator_snp.size(); i++) {
- if (indicator_snp[i]==0) {continue;}
- rs=snpInfo[i].rs_number;
- if (mapRS2var.count(rs)!=0) {
- outfile<<rs<<"\t"<<mapRS2var.at(rs)<<endl;
+ if (mindicator_snp.size()!=0) {
+ for (size_t t=0; t<mindicator_snp.size(); t++) {
+ indicator_snp=mindicator_snp[t];
+ for (size_t i=0; i<indicator_snp.size(); i++) {
+ if (indicator_snp[i]==0) {continue;}
+ rs=snpInfo[i].rs_number;
+ outfile<<rs<<endl;
+ }
+ }
+ } else {
+ for (size_t i=0; i<indicator_snp.size(); i++) {
+ if (indicator_snp[i]==0) {continue;}
+ rs=snpInfo[i].rs_number;
+ outfile<<rs<<endl;
}
}
@@ -1564,3 +1738,219 @@ void PARAM::CopyRead (gsl_vector *log_N)
+void PARAM::ObtainWeight (const set<string> &setSnps_beta, map<string, double> &mapRS2wK)
+{
+ mapRS2wK.clear();
+
+ vector<double> wsum, wcount;
+
+ for (size_t i=0; i<n_vc; i++) {
+ wsum.push_back(0.0);
+ wcount.push_back(0.0);
+ }
+
+ string rs;
+ if (msnpInfo.size()==0) {
+ for (size_t i=0; i<snpInfo.size(); i++) {
+ if (indicator_snp[i]==0) {continue;}
+
+ rs=snpInfo[i].rs_number;
+ if ( (setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) && (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) && (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) && (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
+ if (mapRS2wsnp.size()!=0) {
+ mapRS2wK[rs]=mapRS2wsnp[rs];
+ if (mapRS2cat.size()==0) {
+ wsum[0]+=mapRS2wsnp[rs];
+ } else {
+ wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs];
+ }
+ wcount[0]++;
+ } else {
+ mapRS2wK[rs]=1;
+ }
+ }
+
+ }
+ } else {
+ for (size_t t=0; t<msnpInfo.size(); t++) {
+ snpInfo=msnpInfo[t];
+ indicator_snp=mindicator_snp[t];
+
+ for (size_t i=0; i<snpInfo.size(); i++) {
+ if (indicator_snp[i]==0) {continue;}
+
+ rs=snpInfo[i].rs_number;
+ if ( (setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) && (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) && (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) && (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
+ if (mapRS2wsnp.size()!=0) {
+ mapRS2wK[rs]=mapRS2wsnp[rs];
+ if (mapRS2cat.size()==0) {
+ wsum[0]+=mapRS2wsnp[rs];
+ } else {
+ wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs];
+ }
+ wcount[0]++;
+ } else {
+ mapRS2wK[rs]=1;
+ }
+ }
+ }
+ }
+ }
+
+ if (mapRS2wsnp.size()!=0) {
+ for (size_t i=0; i<n_vc; i++) {
+ wsum[i]/=wcount[i];
+ }
+
+ for (map<string, double>::iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
+ if (mapRS2cat.size()==0) {
+ it->second/=wsum[0];
+ } else {
+ it->second/=wsum[mapRS2cat[it->first]];
+ }
+ }
+ }
+ return;
+}
+
+
+//pve_flag=0 then do not change pve; pve_flag==1, then change pve to 0 if pve < 0 and pve to 1 if pve > 1
+void PARAM::UpdateWeight (const size_t pve_flag, const map<string, double> &mapRS2wK, const size_t ni_test, const gsl_vector *ns, map<string, double> &mapRS2wA)
+{
+ double d;
+ vector<double> wsum, wcount;
+
+ for (size_t i=0; i<n_vc; i++) {
+ wsum.push_back(0.0);
+ wcount.push_back(0.0);
+ }
+
+ for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
+ d=1;
+ for (size_t i=0; i<n_vc; i++) {
+ if (v_pve[i]>=1 && pve_flag==1) {
+ d+=(double)ni_test/gsl_vector_get(ns, i)*mapRS2wcat[it->first][i];
+ } else if (v_pve[i]<=0 && pve_flag==1) {
+ d+=0;
+ } else {
+ d+=(double)ni_test/gsl_vector_get(ns, i)*mapRS2wcat[it->first][i]*v_pve[i];
+ }
+ }
+ mapRS2wA[it->first]=1/(d*d);
+
+ if (mapRS2cat.size()==0) {
+ wsum[0]+=mapRS2wA[it->first];
+ wcount[0]++;
+ } else {
+ wsum[mapRS2cat[it->first]]+=mapRS2wA[it->first];
+ wcount[mapRS2cat[it->first]]++;
+ }
+ }
+
+ for (size_t i=0; i<n_vc; i++) {
+ wsum[i]/=wcount[i];
+ }
+
+ for (map<string, double>::iterator it=mapRS2wA.begin(); it!=mapRS2wA.end(); ++it) {
+ if (mapRS2cat.size()==0) {
+ it->second/=wsum[0];
+ } else {
+ it->second/=wsum[mapRS2cat[it->first]];
+ }
+ }
+ return;
+}
+
+// this function updates indicator_snp, and save z-scores and other values into vectors
+void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA, const map<string, string> &mapRS2A1, const map<string, double> &mapRS2z, gsl_vector *w, gsl_vector *z, vector<size_t> &vec_cat)
+{
+ gsl_vector_set_zero (w);
+ gsl_vector_set_zero (z);
+ vec_cat.clear();
+
+ string rs, a1;
+ size_t c=0;
+ if (msnpInfo.size()==0) {
+ for (size_t i=0; i<snpInfo.size(); i++) {
+ if (indicator_snp[i]==0) {continue;}
+
+ rs=snpInfo[i].rs_number;
+ a1=snpInfo[i].a_minor;
+
+ if (mapRS2wA.count(rs)!=0) {
+ if (a1==mapRS2A1.at(rs)) {
+ gsl_vector_set (z, c, mapRS2z.at(rs) );
+ } else {
+ gsl_vector_set (z, c, -1*mapRS2z.at(rs) );
+ }
+ vec_cat.push_back(mapRS2cat.at(rs) );
+ gsl_vector_set (w, c, mapRS2wA.at(rs) );
+
+ c++;
+ } else {
+ indicator_snp[i]=0;
+ }
+ }
+ } else {
+ for (size_t t=0; t<msnpInfo.size(); t++) {
+ snpInfo=msnpInfo[t];
+
+ for (size_t i=0; i<snpInfo.size(); i++) {
+ if (mindicator_snp[t][i]==0) {continue;}
+
+ rs=snpInfo[i].rs_number;
+ a1=snpInfo[i].a_minor;
+
+ if (mapRS2wA.count(rs)!=0) {
+ if (a1==mapRS2A1.at(rs)) {
+ gsl_vector_set (z, c, mapRS2z.at(rs) );
+ } else {
+ gsl_vector_set (z, c, -1*mapRS2z.at(rs) );
+ }
+ vec_cat.push_back(mapRS2cat.at(rs) );
+ gsl_vector_set (w, c, mapRS2wA.at(rs) );
+
+ c++;
+ } else {
+ mindicator_snp[t][i]=0;
+ }
+ }
+ }
+ }
+
+ return;
+}
+
+
+
+// this function updates indicator_snp, and save z-scores and other values into vectors
+void PARAM::UpdateSNP (const map<string, double> &mapRS2wA)
+{
+ string rs;
+ if (msnpInfo.size()==0) {
+ for (size_t i=0; i<snpInfo.size(); i++) {
+ if (indicator_snp[i]==0) {continue;}
+
+ rs=snpInfo[i].rs_number;
+
+ if (mapRS2wA.count(rs)==0) {
+ indicator_snp[i]=0;
+ }
+ }
+ } else {
+ for (size_t t=0; t<msnpInfo.size(); t++) {
+ snpInfo=msnpInfo[t];
+
+ for (size_t i=0; i<mindicator_snp[t].size(); i++) {
+ if (mindicator_snp[t][i]==0) {continue;}
+
+ rs=snpInfo[i].rs_number;
+
+ if (mapRS2wA.count(rs)==0) {
+ mindicator_snp[t][i]=0;
+ }
+ }
+ }
+ }
+
+ return;
+}