aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPeter Carbonetto2017-07-21 04:40:10 -0500
committerPeter Carbonetto2017-07-21 04:40:10 -0500
commita2bea614636467f17cf4d3c51118f1cd51c6ce8d (patch)
tree4f6c6b34421748a6f8c3ddd6d054194bc0e85334 /src
parent92ce6ce0f50f54c7cf4ba5488a423dcc0ded23fb (diff)
downloadpangemma-a2bea614636467f17cf4d3c51118f1cd51c6ce8d.tar.gz
Removed some more commented-out code from mvlmm.cpp.
Diffstat (limited to 'src')
-rw-r--r--src/mvlmm.cpp1965
1 files changed, 1037 insertions, 928 deletions
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index 5eb7e38..78cd926 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -3403,7 +3403,7 @@ void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
time_start=clock();
- // 3 is before 1
+ // 3 is before 1.
if (a_mode==3 || a_mode==4) {
p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
V_g_null, V_e_null, UltVehiY, beta, Vbeta);
@@ -3565,10 +3565,15 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
- //large matrices for NR
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
+
+ // Each dcxdc block is x_k \otimes H_k^{-1}.
+ gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
gsl_vector *x=gsl_vector_alloc (n_size);
@@ -3582,7 +3587,7 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
gsl_vector *beta=gsl_vector_alloc (d_size);
gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
- //null estimates for initial values
+ // Null estimates for initial values.
gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
@@ -3590,7 +3595,8 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
+ gsl_matrix_view xHi_all_sub =
+ gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
gsl_matrix_transpose_memcpy (Y, UtY);
@@ -3601,30 +3607,37 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
gsl_vector_view B_col=gsl_matrix_column(B, c_size);
gsl_vector_set_zero(&B_col.vector);
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
+ Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+ &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian,
+ crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub.matrix, se_B_null);
c=0;
Vg_remle_null.clear();
Ve_remle_null.clear();
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size,
+ c+v_size) );
+ c++;
+ }
}
beta_remle_null.clear();
se_beta_remle_null.clear();
for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
+ for (size_t j=0; j<se_B_null->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
+ }
}
logl_remle_H0=logl_H0;
@@ -3633,258 +3646,273 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
cout<<"REMLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
}
cout<<"REMLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
}
cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
+ logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
+ Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub.matrix, se_B_null);
c=0;
Vg_mle_null.clear();
Ve_mle_null.clear();
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
+ c++;
+ }
}
beta_mle_null.clear();
se_beta_mle_null.clear();
for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
+ for (size_t j=0; j<se_B_null->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
+ }
}
logl_mle_H0=logl_H0;
cout<<"MLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
}
cout<<"MLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
}
cout<<"MLE likelihood = "<<logl_H0<<endl;
-
vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
+ v_beta.push_back(0.0);
}
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
}
gsl_matrix_memcpy (V_g_null, V_g);
gsl_matrix_memcpy (V_e_null, V_e);
gsl_matrix_memcpy (B_null, B);
- //start reading genotypes and analyze
+ // Start reading genotypes and analyze.
size_t csnp=0, t_last=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
if (indicator_snp[t]==0) {continue;}
t_last++;
}
for (size_t t=0; t<indicator_snp.size(); ++t) {
- //if (t>=1) {break;}
- !safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
- else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=(double)(ni_test-n_miss);
+ !safeGetline(infile, line).eof();
+ if (t%d_pace==0 || t==(ns_total-1)) {
+ ProgressBar ("Reading SNPs ", t, ns_total-1);
+ }
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<ni_total; ++i) {
+ ch_ptr=strtok (NULL, " , \t");
+ if (indicator_idv[i]==0) {continue;}
+
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ }
+ else {
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ }
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
- geno=gsl_vector_get(x, i);
- //if (x_mean>1) {
- // gsl_vector_set(x, i, 2-geno);
- //}
+ gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
+ gsl_vector_memcpy (&Xlarge_col.vector, x);
+ csnp++;
+
+ if (csnp%msize==0 || csnp==t_last ) {
+ size_t l=0;
+ if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
+
+ gsl_matrix_view Xlarge_sub =
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub =
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+ time_start=clock();
+ eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ gsl_matrix_set_zero (Xlarge);
+
+ for (size_t i=0; i<l; i++) {
+ gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+ gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
+
+ // Initial values.
+ gsl_matrix_memcpy (V_g, V_g_null);
+ gsl_matrix_memcpy (V_e, V_e_null);
+ gsl_matrix_memcpy (B, B_null);
+
+ time_start=clock();
+
+ // 3 is before 1.
+ if (a_mode==3 || a_mode==4) {
+ p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
+ V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+ if (p_score<p_nr && crt==1) {
+ logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
+ crt_b, crt_c);
+ p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
}
+ }
- /*
- //calculate statistics
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- */
-
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, x);
- csnp++;
-
- if (csnp%msize==0 || csnp==t_last ) {
- size_t l=0;
- if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-
- gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Xlarge);
-
- for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
- gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-
- //initial values
- gsl_matrix_memcpy (V_g, V_g_null);
- gsl_matrix_memcpy (V_e, V_e_null);
- gsl_matrix_memcpy (B, B_null);
-
- time_start=clock();
-
- //3 is before 1
- if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- //calculate beta and Vbeta
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- //calculate beta and Vbeta
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
- }
-
- //if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //store summary data
- //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
- for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
- }
-
- c=0;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
- }
+ if (a_mode==2 || a_mode==4) {
+ logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
+ UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+
+ // Calculate beta and Vbeta.
+ p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
+ V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+ if (p_lrt<p_nr) {
+ logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
+ Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
+ crt_a, crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
+ V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
+ (double)d_size );
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
- sumStat.push_back(SNPs);
+ if (crt==1) {
+ p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
}
}
- }
- cout<<endl;
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
+ UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
+ V_e, UltVehiY, beta, Vbeta);
+
+ if (p_wald<p_nr) {
+ logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
+ Hi_all, xHi_all, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
+ V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (crt==1) {
+ p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ // Store summary data.
+ for (size_t i=0; i<d_size; i++) {
+ v_beta[i]=gsl_vector_get (beta, i);
+ }
+
+ c=0;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg[c]=gsl_matrix_get (V_g, i, j);
+ v_Ve[c]=gsl_matrix_get (V_e, i, j);
+ v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
+ v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ }
+ }
+ cout<<endl;
infile.close();
infile.clear();
@@ -3925,14 +3953,8 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
return;
}
-
-
-
-
-
-
-void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY)
-{
+void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY) {
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
@@ -3943,23 +3965,21 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
char ch[1];
bitset<8> b;
- // double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
double crt_a, crt_b, crt_c;
int n_bit, n_miss, ci_total, ci_test;
double geno, x_mean;
size_t c=0;
- // double s=0.0;
size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
- //create a large matrix
+ // Create a large matrix.
size_t msize=10000;
gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
gsl_matrix_set_zero(Xlarge);
- //large matrices for EM
+ // Large matrices for EM.
gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
@@ -3969,10 +3989,16 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
- //large matrices for NR
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
+
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
gsl_vector *x=gsl_vector_alloc (n_size);
@@ -3985,7 +4011,7 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
gsl_vector *beta=gsl_vector_alloc (d_size);
gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
- //null estimates for initial values
+ // Null estimates for initial values.
gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
@@ -3993,7 +4019,8 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
+ gsl_matrix_view xHi_all_sub =
+ gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
gsl_matrix_transpose_memcpy (Y, UtY);
gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
@@ -4003,33 +4030,38 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
gsl_vector_view B_col=gsl_matrix_column(B, c_size);
gsl_vector_set_zero(&B_col.vector);
- //time_start=clock();
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
+ Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
- //cout<<"time for REML in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+ &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian,
+ crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub.matrix, se_B_null);
c=0;
Vg_remle_null.clear();
Ve_remle_null.clear();
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
+ c+v_size));
+ c++;
+ }
}
beta_remle_null.clear();
se_beta_remle_null.clear();
for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
+ for (size_t j=0; j<se_B_null->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
+ }
}
logl_remle_H0=logl_H0;
@@ -4037,123 +4069,124 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
cout.precision(4);
cout<<"REMLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
}
cout<<"REMLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
cout<<endl;
}
cout<<"REMLE likelihood = "<<logl_H0<<endl;
- //time_start=clock();
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
- //cout<<"time for MLE in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
+ Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub.matrix, se_B_null);
c=0;
Vg_mle_null.clear();
Ve_mle_null.clear();
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
+ c++;
+ }
}
beta_mle_null.clear();
se_beta_mle_null.clear();
for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
+ for (size_t j=0; j<se_B_null->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
+ }
}
logl_mle_H0=logl_H0;
cout<<"MLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
}
cout<<"MLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
}
cout<<"MLE likelihood = "<<logl_H0<<endl;
-
+
vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
+ v_beta.push_back(0.0);
}
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
}
-
+
gsl_matrix_memcpy (V_g_null, V_g);
gsl_matrix_memcpy (V_e_null, V_e);
gsl_matrix_memcpy (B_null, B);
-
-
- //start reading genotypes and analyze
-
- //calculate n_bit and c, the number of bit for each snp
+
+ // Start reading genotypes and analyze.
+ // Calculate n_bit and c, the number of bit for each snp.
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
- //print the first three majic numbers
+ // Print the first three magic numbers.
for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
+ infile.read(ch,1);
+ b=ch[0];
}
size_t csnp=0, t_last=0;
@@ -4162,168 +4195,161 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
t_last++;
}
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
- if (indicator_snp[t]==0) {continue;}
-
- //if (t>=0) {break;}
- //if (snpInfo[t].rs_number!="MAG18140902") {continue;}
- //cout<<t<<endl;
-
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
- //read genotypes
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
-
- ci_total++;
- ci_test++;
- }
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- geno=gsl_vector_get(x,i);
- if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
- //if (x_mean>1) {
- // gsl_vector_set(x, i, 2-geno);
- //}
- }
-
- /*
- if (t==0) {
- ofstream outfile ("./snp1.txt", ofstream::out);
- if (!outfile) {cout<<"error writing file: "<<endl; return;}
- for (size_t i=0; i<x->size; i++) {
- outfile<<gsl_vector_get(x, i)<<endl;
- }
- outfile.clear();
- outfile.close();
- }
- */
-
- /*
- //calculate statistics
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- */
-
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, x);
- csnp++;
-
- if (csnp%msize==0 || csnp==t_last ) {
- size_t l=0;
- if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-
- gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Xlarge);
-
- for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
- gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-
- //initial values
- gsl_matrix_memcpy (V_g, V_g_null);
- gsl_matrix_memcpy (V_e, V_e_null);
- gsl_matrix_memcpy (B, B_null);
-
- time_start=clock();
-
- //3 is before 1
- if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- //calculate beta and Vbeta
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-
- //calculate beta and Vbeta
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
+ if (t%d_pace==0 || t==snpInfo.size()-1) {
+ ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
+ }
+ if (indicator_snp[t]==0) {continue;}
- if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t*n_bit+3);
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+ //read genotypes
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j=0; j<4; ++j) {
+ if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
+ else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ else {gsl_vector_set(x, ci_test, -9); n_miss++; }
+ }
+
+ ci_total++;
+ ci_test++;
}
- }
- }
-
- //cout<<setprecision(10)<<p_wald<<"\t"<<p_lrt<<"\t"<<p_score<<endl;
-
- //if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //store summary data
- //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
- for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
- }
-
- c=0;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
- }
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ geno=gsl_vector_get(x,i);
+ if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
+ }
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
- sumStat.push_back(SNPs);
+ gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
+ gsl_vector_memcpy (&Xlarge_col.vector, x);
+ csnp++;
+
+ if (csnp%msize==0 || csnp==t_last ) {
+ size_t l=0;
+ if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
+
+ gsl_matrix_view Xlarge_sub =
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub =
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+ time_start=clock();
+ eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ gsl_matrix_set_zero (Xlarge);
+
+ for (size_t i=0; i<l; i++) {
+ gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+ gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
+
+ // Initial values.
+ gsl_matrix_memcpy (V_g, V_g_null);
+ gsl_matrix_memcpy (V_e, V_e_null);
+ gsl_matrix_memcpy (B, B_null);
+
+ time_start=clock();
+
+ // 3 is before 1.
+ if (a_mode==3 || a_mode==4) {
+ p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
+ V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+
+ if (p_score<p_nr && crt==1) {
+ logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
+ crt_b, crt_c);
+ p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
+ UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+
+ // Calculate beta and Vbeta.
+ p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
+ V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+ if (p_lrt<p_nr) {
+ logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
+ Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
+ crt_a, crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
+ V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
+ (double)d_size );
+ if (crt==1) {
+ p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
+ UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
+ V_e, UltVehiY, beta, Vbeta);
+
+ if (p_wald<p_nr) {
+ logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
+ Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
+ crt_a, crt_b, crt_c);
+ p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
+ V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (crt==1) {
+ p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
}
}
+ }
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ // Store summary data.
+ for (size_t i=0; i<d_size; i++) {
+ v_beta[i]=gsl_vector_get (beta, i);
+ }
+
+ c=0;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg[c]=gsl_matrix_get (V_g, i, j);
+ v_Ve[c]=gsl_matrix_get (V_e, i, j);
+ v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
+ v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ }
}
cout<<endl;
- //cout<<"time_opt = "<<time_opt<<endl;
-
infile.close();
infile.clear();
@@ -4362,19 +4388,21 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
return;
}
-
-
-
-//calculate Vg, Ve, B, se(B) in the null mvLMM model
-//both B and se_B are d by c matrices
-void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const size_t em_iter, const size_t nr_iter, const double em_prec, const double nr_prec, const double l_min, const double l_max, const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, gsl_matrix *se_B)
-{
+// Calculate Vg, Ve, B, se(B) in the null mvLMM model.
+// Both B and se_B are d by c matrices.
+void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_matrix *UtY, const size_t em_iter,
+ const size_t nr_iter, const double em_prec,
+ const double nr_prec, const double l_min,
+ const double l_max, const size_t n_region,
+ gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
+ gsl_matrix *se_B) {
size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
size_t dc_size=d_size*c_size, v_size=d_size*(d_size+1)/2;
double logl, crt_a, crt_b, crt_c;
- //large matrices for EM
+ // Large matrices for EM.
gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
@@ -4384,25 +4412,34 @@ void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl
gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
- //large matrices for NR
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
- //transpose matrices
+ // Transpose matrices.
gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *W=gsl_matrix_alloc (c_size, n_size);
gsl_matrix_transpose_memcpy (Y, UtY);
gsl_matrix_transpose_memcpy (W, UtW);
- //initial, EM, NR, and calculate B
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max, n_region, V_g, V_e, B);
- logl=MphEM ('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- logl=MphNR ('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ // Initial, EM, NR, and calculate B.
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y,
+ l_min, l_max, n_region, V_g, V_e, B);
+ logl=MphEM ('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, B);
+ logl=MphNR ('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
MphCalcBeta (eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
- //free matrices
+ // Free matrices.
gsl_matrix_free(U_hat);
gsl_matrix_free(E_hat);
gsl_matrix_free(OmegaU);
@@ -4423,15 +4460,14 @@ void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl
return;
}
-
-
-
-
-void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env)
-{
+void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY,
+ const gsl_vector *env) {
igzstream infile (file_geno.c_str(), igzstream::in);
-// ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+ if (!infile) {
+ cout<<"error reading genotype file:"<<file_geno<<endl;
+ return;
+ }
clock_t time_start=clock();
time_UtX=0; time_opt=0;
@@ -4439,17 +4475,15 @@ void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const
string line;
char *ch_ptr;
- // double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
double crt_a, crt_b, crt_c;
int n_miss, c_phen;
double geno, x_mean;
size_t c=0;
- // double s=0.0;
size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
- //large matrices for EM
+ // Large matrices for EM.
gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
@@ -4459,10 +4493,15 @@ void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const
gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
- //large matrices for NR
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
gsl_vector *x=gsl_vector_alloc (n_size);
@@ -4476,24 +4515,27 @@ void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const
gsl_vector *beta=gsl_vector_alloc (d_size);
gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
- //null estimates for initial values; including env but not including x
+ // Null estimates for initial values; including env but not
+ // including x.
gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
- gsl_matrix_view X_sub1=gsl_matrix_submatrix (X, 0, 0, c_size-1, n_size);
- gsl_matrix_view B_sub1=gsl_matrix_submatrix (B, 0, 0, d_size, c_size-1);
- gsl_matrix_view xHi_all_sub1=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*(c_size-1), d_size*n_size);
+ gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size);
+ gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1);
+ gsl_matrix_view xHi_all_sub1=
+ gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size);
gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub2=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
+ gsl_matrix_view xHi_all_sub2=
+ gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
gsl_matrix_transpose_memcpy (Y, UtY);
- gsl_matrix_view X_sub0=gsl_matrix_submatrix (X, 0, 0, c_size-2, n_size);
+ gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size);
gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
@@ -4508,30 +4550,37 @@ void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const
gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
gsl_vector_set_zero(&B_col2.vector);
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, se_B_null1);
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix,
+ Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y,
+ Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub1.matrix, se_B_null1);
c=0;
Vg_remle_null.clear();
Ve_remle_null.clear();
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
+ c+v_size));
+ c++;
+ }
}
beta_remle_null.clear();
se_beta_remle_null.clear();
for (size_t i=0; i<se_B_null1->size1; i++) {
- for (size_t j=0; j<se_B_null1->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
- }
+ for (size_t j=0; j<se_B_null1->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
+ }
}
logl_remle_H0=logl_H0;
@@ -4540,157 +4589,164 @@ void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const
cout<<"REMLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
}
cout<<"REMLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
}
cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, se_B_null1);
+
+ logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y,
+ Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub1.matrix, se_B_null1);
c=0;
Vg_mle_null.clear();
Ve_mle_null.clear();
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
+ c++;
+ }
}
beta_mle_null.clear();
se_beta_mle_null.clear();
for (size_t i=0; i<se_B_null1->size1; i++) {
- for (size_t j=0; j<se_B_null1->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
- }
+ for (size_t j=0; j<se_B_null1->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
+ }
}
logl_mle_H0=logl_H0;
cout<<"MLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
}
cout<<"MLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
}
cout<<"MLE likelihood = "<<logl_H0<<endl;
-
-
+
vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
+ v_beta.push_back(0.0);
}
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
}
gsl_matrix_memcpy (V_g_null, V_g);
gsl_matrix_memcpy (V_e_null, V_e);
gsl_matrix_memcpy (B_null, B);
- //start reading genotypes and analyze
+ // Start reading genotypes and analyze.
for (size_t t=0; t<indicator_snp.size(); ++t) {
- //if (t>=1) {break;}
- !safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
- else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
- geno=gsl_vector_get(x, i);
- if (x_mean>1) {
- gsl_vector_set(x, i, 2-geno);
- }
- }
-
- //calculate statistics
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
- gsl_vector_mul (x, env);
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+ !safeGetline(infile, line).eof();
+ if (t%d_pace==0 || t==(ns_total-1)) {
+ ProgressBar ("Reading SNPs ", t, ns_total-1);
+ }
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<ni_total; ++i) {
+ ch_ptr=strtok (NULL, " , \t");
+ if (indicator_idv[i]==0) {continue;}
+
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ }
+ else {
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ // Calculate statistics.
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+ gsl_vector_mul (x, env);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
//initial values
gsl_matrix_memcpy (V_g, V_g_null);
gsl_matrix_memcpy (V_e, V_e_null);
@@ -4698,85 +4754,116 @@ void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const
if (a_mode==2 || a_mode==3 || a_mode==4) {
if (a_mode==3 || a_mode==4) {
- logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
- logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, se_B_null2);
+ logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval,
+ &X_sub2.matrix, Y, U_hat, E_hat, OmegaU,
+ OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval,
+ &X_sub2.matrix, Y, Hi_all,
+ &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub2.matrix, se_B_null2);
}
if (a_mode==2 || a_mode==4) {
- logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
- logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, se_B_null2);
+ logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval,
+ &X_sub2.matrix, Y, U_hat, E_hat, OmegaU,
+ OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval,
+ &X_sub2.matrix, Y, Hi_all,
+ &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub2.matrix, se_B_null2);
}
}
-
time_start=clock();
- //3 is before 1
+ // 3 is before 1.
if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
+ p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
+ V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+ if (p_score<p_nr && crt==1) {
+ logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian,
+ crt_a, crt_b, crt_c);
+ p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
}
-
+
if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- //calculate beta and Vbeta
- p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- //calculate beta and Vbeta
- p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
+ logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
+ UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ // Calculate beta and Vbeta.
+ p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
+ V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),(double)d_size);
+
+ if (p_lrt<p_nr) {
+ logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
+ Hi_all, xHi_all, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
+ V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),
+ (double)d_size );
+
+ if (crt==1) {
+ p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
+ logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
+ UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
+ V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (p_wald<p_nr) {
+ logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
+ Hi_all, xHi_all, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
+ V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (crt==1) {
+ p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
}
if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- //store summary data
- //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ // Store summary data.
for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
+ v_beta[i]=gsl_vector_get (beta, i);
}
c=0;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg[c]=gsl_matrix_get (V_g, i, j);
+ v_Ve[c]=gsl_matrix_get (V_e, i, j);
+ v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+ c++;
+ }
}
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
+ v_Ve, v_Vbeta};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
@@ -4817,17 +4904,15 @@ void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const
return;
}
-
-
-
-
-
-
-void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env)
-{
+void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY,
+ const gsl_vector *env) {
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+ if (!infile) {
+ cout<<"error reading bed file:"<<file_bed<<endl;
+ return;
+ }
clock_t time_start=clock();
time_UtX=0; time_opt=0;
@@ -4835,17 +4920,15 @@ void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const
char ch[1];
bitset<8> b;
- // double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
double crt_a, crt_b, crt_c;
int n_bit, n_miss, ci_total, ci_test;
double geno, x_mean;
size_t c=0;
- // double s=0.0;
size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
- //large matrices for EM
+ // Large matrices for EM.
gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
@@ -4855,10 +4938,15 @@ void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const
gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
- //large matrices for NR
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
+
+ // Each column is H_k^{-1}y_k
+ gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
gsl_vector *x=gsl_vector_alloc (n_size);
@@ -4871,24 +4959,26 @@ void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const
gsl_vector *beta=gsl_vector_alloc (d_size);
gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
- //null estimates for initial values
+ // Null estimates for initial values.
gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
- gsl_matrix_view X_sub1=gsl_matrix_submatrix (X, 0, 0, c_size-1, n_size);
- gsl_matrix_view B_sub1=gsl_matrix_submatrix (B, 0, 0, d_size, c_size-1);
- gsl_matrix_view xHi_all_sub1=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*(c_size-1), d_size*n_size);
+ gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size);
+ gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1);
+ gsl_matrix_view xHi_all_sub1=
+ gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size);
gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub2=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
+ gsl_matrix_view xHi_all_sub2=
+ gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
gsl_matrix_transpose_memcpy (Y, UtY);
- gsl_matrix_view X_sub0=gsl_matrix_submatrix (X, 0, 0, c_size-2, n_size);
+ gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size);
gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
@@ -4903,33 +4993,38 @@ void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const
gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
gsl_vector_set_zero(&B_col2.vector);
- //time_start=clock();
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix,
+ Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, se_B_null1);
- //cout<<"time for REML in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y,
+ Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub1.matrix, se_B_null1);
c=0;
Vg_remle_null.clear();
Ve_remle_null.clear();
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
+ c+v_size));
+ c++;
+ }
}
beta_remle_null.clear();
se_beta_remle_null.clear();
for (size_t i=0; i<se_B_null1->size1; i++) {
- for (size_t j=0; j<se_B_null1->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
- }
+ for (size_t j=0; j<se_B_null1->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
+ }
}
logl_remle_H0=logl_H0;
@@ -4937,279 +5032,293 @@ void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const
cout.precision(4);
cout<<"REMLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
}
cout<<"REMLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
}
cout<<"REMLE likelihood = "<<logl_H0<<endl;
- //time_start=clock();
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, se_B_null1);
- //cout<<"time for MLE in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y,
+ U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y,
+ Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub1.matrix, se_B_null1);
c=0;
Vg_mle_null.clear();
Ve_mle_null.clear();
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
- c++;
- }
+ for (size_t j=i; j<d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
+ c++;
+ }
}
beta_mle_null.clear();
se_beta_mle_null.clear();
for (size_t i=0; i<se_B_null1->size1; i++) {
- for (size_t j=0; j<se_B_null1->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
- }
+ for (size_t j=0; j<se_B_null1->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
+ }
}
logl_mle_H0=logl_H0;
cout<<"MLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
}
cout<<"MLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
}
cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
}
cout<<"MLE likelihood = "<<logl_H0<<endl;
vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
+ v_beta.push_back(0.0);
}
for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
}
gsl_matrix_memcpy (V_g_null, V_g);
gsl_matrix_memcpy (V_e_null, V_e);
gsl_matrix_memcpy (B_null, B);
-
- //start reading genotypes and analyze
-
- //calculate n_bit and c, the number of bit for each snp
+ // Start reading genotypes and analyze.
+ // Calculate n_bit and c, the number of bit for each SNP.
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
- //print the first three majic numbers
+ // Print the first three magic numbers.
for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
+ infile.read(ch,1);
+ b=ch[0];
}
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
- if (indicator_snp[t]==0) {continue;}
-
- //if (t>=0) {break;}
- //if (snpInfo[t].rs_number!="MAG18140902") {continue;}
- //cout<<t<<endl;
-
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
- //read genotypes
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
-
- ci_total++;
- ci_test++;
- }
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- geno=gsl_vector_get(x,i);
- if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
- if (x_mean>1) {
- gsl_vector_set(x, i, 2-geno);
- }
- }
-
- /*
- if (t==0) {
- ofstream outfile ("./snp1.txt", ofstream::out);
- if (!outfile) {cout<<"error writing file: "<<endl; return;}
- for (size_t i=0; i<x->size; i++) {
- outfile<<gsl_vector_get(x, i)<<endl;
- }
- outfile.clear();
- outfile.close();
- }
- */
-
- //calculate statistics
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
- gsl_vector_mul (x, env);
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //initial values
- gsl_matrix_memcpy (V_g, V_g_null);
- gsl_matrix_memcpy (V_e, V_e_null);
- gsl_matrix_memcpy (B, B_null);
-
- if (a_mode==2 || a_mode==3 || a_mode==4) {
- if (a_mode==3 || a_mode==4) {
- logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
- logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, se_B_null2);
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
- logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, se_B_null2);
- }
- }
-
- time_start=clock();
-
- //3 is before 1
- if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- //calculate beta and Vbeta
- p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-
- //calculate beta and Vbeta
- p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
- }
-
- //cout<<setprecision(10)<<p_wald<<"\t"<<p_lrt<<"\t"<<p_score<<endl;
-
- if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
+ if (t%d_pace==0 || t==snpInfo.size()-1) {
+ ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
+ }
+ if (indicator_snp[t]==0) {continue;}
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t*n_bit+3);
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ // Read genotypes.
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
- //store summary data
- //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
- for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
- }
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j=0; j<4; ++j) {
+
+ if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
+ else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ else {gsl_vector_set(x, ci_test, -9); n_miss++; }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ geno=gsl_vector_get(x,i);
+ if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ // Calculate statistics.
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+ gsl_vector_mul (x, env);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ // Initial values.
+ gsl_matrix_memcpy (V_g, V_g_null);
+ gsl_matrix_memcpy (V_e, V_e_null);
+ gsl_matrix_memcpy (B, B_null);
+
+ if (a_mode==2 || a_mode==3 || a_mode==4) {
+ if (a_mode==3 || a_mode==4) {
+ logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval,
+ &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE,
+ UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub2.matrix);
+ logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval,
+ &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub2.matrix, se_B_null2);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval,
+ &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE,
+ UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub2.matrix);
+ logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval,
+ &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY,
+ &B_sub2.matrix, se_B_null2);
+ }
+ }
+
+ time_start=clock();
+
+ // 3 is before 1.
+ if (a_mode==3 || a_mode==4) {
+ p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
+ V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+
+ if (p_score<p_nr && crt==1) {
+ logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, B);
+
+ // Calculate beta and Vbeta.
+ p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g,
+ V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+ if (p_lrt<p_nr) {
+ logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
+ crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g,
+ V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+ if (crt==1) {
+ p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, B);
+ p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g,
+ V_e, UltVehiY, beta, Vbeta);
+
+ if (p_wald<p_nr) {
+ logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
+ crt_b, crt_c);
+ p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g,
+ V_e, UltVehiY, beta, Vbeta);
+
+ if (crt==1) {
+ p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
- c=0;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
- }
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
- sumStat.push_back(SNPs);
- }
+ // Store summary data.
+ for (size_t i=0; i<d_size; i++) {
+ v_beta[i]=gsl_vector_get (beta, i);
+ }
+
+ c=0;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg[c]=gsl_matrix_get (V_g, i, j);
+ v_Ve[c]=gsl_matrix_get (V_e, i, j);
+ v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score,
+ v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
cout<<endl;
- //cout<<"time_opt = "<<time_opt<<endl;
-
infile.close();
infile.clear();