aboutsummaryrefslogtreecommitdiff
path: root/src/lmm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/lmm.cpp')
-rw-r--r--src/lmm.cpp1517
1 files changed, 1030 insertions, 487 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp
index e0b4160..7bcf89a 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -26,7 +26,7 @@
#include <cmath>
#include <iostream>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include <bitset>
#include <cstring>
@@ -58,56 +58,58 @@ using namespace std;
-void LMM::CopyFromParam (PARAM &cPar)
+void LMM::CopyFromParam (PARAM &cPar)
{
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
file_gene=cPar.file_gene;
-
+ // WJA added
+ file_oxford=cPar.file_oxford;
+
l_min=cPar.l_min;
l_max=cPar.l_max;
- n_region=cPar.n_region;
+ n_region=cPar.n_region;
l_mle_null=cPar.l_mle_null;
logl_mle_H0=cPar.logl_mle_H0;
-
+
time_UtX=0.0;
time_opt=0.0;
-
+
ni_total=cPar.ni_total;
ns_total=cPar.ns_total;
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
n_cvt=cPar.n_cvt;
-
+
ng_total=cPar.ng_total;
ng_test=0;
-
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
+
+ indicator_idv=cPar.indicator_idv;
+ indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
-void LMM::CopyToParam (PARAM &cPar)
+void LMM::CopyToParam (PARAM &cPar)
{
cPar.time_UtX=time_UtX;
- cPar.time_opt=time_opt;
-
+ cPar.time_opt=time_opt;
+
cPar.ng_test=ng_test;
-
+
return;
}
-void LMM::WriteFiles ()
+void LMM::WriteFiles ()
{
string file_str;
file_str=path_out+"/"+file_out;
@@ -118,7 +120,7 @@ void LMM::WriteFiles ()
if (!file_gene.empty()) {
outfile<<"geneID"<<"\t";
-
+
if (a_mode==1) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"p_wald"<<endl;
} else if (a_mode==2) {
@@ -128,10 +130,10 @@ void LMM::WriteFiles ()
} else if (a_mode==4) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
-
- for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
+
+ for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
outfile<<snpInfo[t].rs_number<<"\t";
-
+
if (a_mode==1) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].p_wald <<endl;
} else if (a_mode==2) {
@@ -141,10 +143,10 @@ void LMM::WriteFiles ()
} else if (a_mode==4) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
} else {}
- }
+ }
} else {
outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
+
if (a_mode==1) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"p_wald"<<endl;
} else if (a_mode==2) {
@@ -154,13 +156,13 @@ void LMM::WriteFiles ()
} else if (a_mode==4) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
-
+
size_t t=0;
for (size_t i=0; i<snpInfo.size(); ++i) {
if (indicator_snp[i]==0) {continue;}
-
+
outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
+
if (a_mode==1) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].p_wald <<endl;
} else if (a_mode==2) {
@@ -173,8 +175,8 @@ void LMM::WriteFiles ()
t++;
}
}
-
-
+
+
outfile.close();
outfile.clear();
return;
@@ -196,10 +198,10 @@ size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
size_t index;
size_t l, h;
if (b>a) {l=a; h=b;} else {l=b; h=a;}
-
+
size_t n=n_cvt+2;
- index=(2*n-l+2)*(l-1)/2+h-l;
-
+ index=(2*n-l+2)*(l-1)/2+h-l;
+
return index;
}
@@ -209,12 +211,12 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval
size_t index_ab, index_aw, index_bw, index_ww;
double p_ab;
double ps_ab, ps_aw, ps_bw, ps_ww;
-
+
for (size_t p=0; p<=n_cvt+1; ++p) {
for (size_t a=p+1; a<=n_cvt+2; ++a) {
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
+ if (p==0) {
gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
gsl_blas_ddot (Hi_eval, &Uab_col.vector, &p_ab);
if (e_mode!=0) {p_ab=gsl_vector_get (ab, index_ab)-p_ab;}
@@ -224,12 +226,12 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps_ab=gsl_matrix_get (Pab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-
+
p_ab=ps_ab-ps_aw*ps_bw/ps_ww;
gsl_matrix_set (Pab, p, index_ab, p_ab);
}
@@ -245,12 +247,12 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e
size_t index_ab, index_aw, index_bw, index_ww;
double p2_ab;
double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww;
-
+
for (size_t p=0; p<=n_cvt+1; ++p) {
for (size_t a=p+1; a<=n_cvt+2; ++a) {
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
+ if (p==0) {
gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
gsl_blas_ddot (HiHi_eval, &Uab_col.vector, &p2_ab);
if (e_mode!=0) {p2_ab=p2_ab-gsl_vector_get (ab, index_ab)+2.0*gsl_matrix_get (Pab, 0, index_ab);}
@@ -260,7 +262,7 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps2_ab=gsl_matrix_get (PPab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
@@ -268,11 +270,11 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e
ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
-
+
p2_ab=ps2_ab+ps_aw*ps_bw*ps2_ww/(ps_ww*ps_ww);
p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww;
gsl_matrix_set (PPab, p, index_ab, p2_ab);
-
+
}
}
}
@@ -286,12 +288,12 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiH
size_t index_ab, index_aw, index_bw, index_ww;
double p3_ab;
double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw, ps3_ww;
-
+
for (size_t p=0; p<=n_cvt+1; ++p) {
for (size_t a=p+1; a<=n_cvt+2; ++a) {
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
+ if (p==0) {
gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector, &p3_ab);
if (e_mode!=0) {p3_ab=gsl_vector_get (ab, index_ab)-p3_ab+3.0*gsl_matrix_get (PPab, 0, index_ab)-3.0*gsl_matrix_get (Pab, 0, index_ab);}
@@ -301,7 +303,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiH
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
@@ -312,11 +314,11 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiH
ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw);
ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw);
ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww);
-
+
p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww/(ps_ww*ps_ww*ps_ww);
p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw+ps2_aw*ps2_bw)/ps_ww;
p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/(ps_ww*ps_ww);
-
+
gsl_matrix_set (PPPab, p, index_ab, p3_ab);
}
}
@@ -331,119 +333,119 @@ double LogL_f (double l, void *params)
{
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
+
double f=0.0, logdet_h=0.0, d;
size_t index_yy;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
+ gsl_vector_div (Hi_eval, v_temp);
+
for (size_t i=0; i<(p->eval)->size; ++i) {
d=gsl_vector_get (v_temp, i);
logdet_h+=log(fabs(d));
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-
+ }
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+
double c=0.5*(double)ni_test*(log((double)ni_test)-log(2*M_PI)-1.0);
-
- index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+
+ index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
f=c-0.5*logdet_h-0.5*(double)ni_test*log(P_yy);
-
+
gsl_matrix_free (Pab);
gsl_vector_free (Hi_eval);
gsl_vector_free (v_temp);
return f;
}
-
-
+
+
double LogL_dev1 (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
+
double dev1=0.0, trace_Hi=0.0;
size_t index_yy;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
-
+ gsl_vector_mul (HiHi_eval, Hi_eval);
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-
+
if (p->e_mode!=0) {trace_Hi=(double)ni_test-trace_Hi;}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-
- double trace_HiK=((double)ni_test-trace_Hi)/l;
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+
+ double trace_HiK=((double)ni_test-trace_Hi)/l;
+
index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-
+
double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double yPKPy=(P_yy-PP_yy)/l;
+ double yPKPy=(P_yy-PP_yy)/l;
dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return dev1;
}
-
-
+
+
double LogL_dev2 (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
+
double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
size_t index_yy;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
@@ -451,71 +453,71 @@ double LogL_dev2 (double l, void *params)
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+ gsl_vector_mul (HiHi_eval, Hi_eval);
gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
-
+
index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
-
+ double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
+
double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
+
dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_matrix_free (PPPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return dev2;
}
-
-
-
-
-
+
+
+
+
+
void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
{
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
+
double trace_Hi=0.0, trace_HiHi=0.0;
size_t index_yy;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
@@ -523,54 +525,54 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+ gsl_vector_mul (HiHi_eval, Hi_eval);
gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
double trace_HiK=((double)ni_test-trace_Hi)/l;
double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
-
+
index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-
+
double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
-
- double yPKPy=(P_yy-PP_yy)/l;
+ double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
+
+ double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
+
*dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
*dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_matrix_free (PPPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return;
}
@@ -578,39 +580,39 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
double LogRL_f (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
double df;
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
+
double f=0.0, logdet_h=0.0, logdet_hiw=0.0, d;
size_t index_ww;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *Iab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
+ gsl_vector_div (Hi_eval, v_temp);
+
for (size_t i=0; i<(p->eval)->size; ++i) {
d=gsl_vector_get (v_temp, i);
logdet_h+=log(fabs(d));
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
gsl_vector_set_all (v_temp, 1.0);
- CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
-
+ CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
+
//calculate |WHiW|-|WW|
logdet_hiw=0.0;
for (size_t i=0; i<nc_total; ++i) {
@@ -620,12 +622,12 @@ double LogRL_f (double l, void *params)
d=gsl_matrix_get (Iab, i, index_ww);
logdet_hiw-=log(d);
}
- index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
-
- double c=0.5*df*(log(df)-log(2*M_PI)-1.0);
+
+ double c=0.5*df*(log(df)-log(2*M_PI)-1.0);
f=c-0.5*logdet_h-0.5*logdet_hiw-0.5*df*log(P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (Iab);
gsl_vector_free (Hi_eval);
@@ -637,44 +639,44 @@ double LogRL_f (double l, void *params)
double LogRL_dev1 (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
double df;
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
+
double dev1=0.0, trace_Hi=0.0;
size_t index_ww;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
-
+ gsl_vector_mul (HiHi_eval, Hi_eval);
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+
//calculate tracePK and trace PKPK
double trace_P=trace_Hi;
double ps_ww, ps2_ww;
@@ -685,21 +687,21 @@ double LogRL_dev1 (double l, void *params)
trace_P-=ps2_ww/ps_ww;
}
double trace_PK=(df-trace_P)/l;
-
+
//calculate yPKPy, yPKPKPy
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
- double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
-
- dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
-
+ double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
+ double yPKPy=(P_yy-PP_yy)/l;
+
+ dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return dev1;
}
@@ -708,19 +710,19 @@ double LogRL_dev1 (double l, void *params)
double LogRL_dev2 (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
double df;
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
+
double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
size_t index_ww;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
@@ -728,31 +730,31 @@ double LogRL_dev2 (double l, void *params)
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+ gsl_vector_mul (HiHi_eval, Hi_eval);
gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
//calculate tracePK and trace PKPK
double trace_P=trace_Hi, trace_PP=trace_HiHi;
double ps_ww, ps2_ww, ps3_ww;
@@ -765,46 +767,46 @@ double LogRL_dev2 (double l, void *params)
trace_PP+=ps2_ww*ps2_ww/(ps_ww*ps_ww)-2.0*ps3_ww/ps_ww;
}
double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
-
+
//calculate yPKPy, yPKPKPy
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
+ double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
+ double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
+
dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_matrix_free (PPPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return dev2;
}
-
+
void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
double df;
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
+
double trace_Hi=0.0, trace_HiHi=0.0;
size_t index_ww;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
@@ -812,31 +814,31 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+ gsl_vector_mul (HiHi_eval, Hi_eval);
gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
//calculate tracePK and trace PKPK
double trace_P=trace_Hi, trace_PP=trace_HiHi;
double ps_ww, ps2_ww, ps3_ww;
@@ -850,29 +852,29 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
}
double trace_PK=(df-trace_P)/l;
double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
-
+
//calculate yPKPy, yPKPKPy
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
+ double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
+ double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
+
*dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
*dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_matrix_free (PPPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return ;
}
-
+
@@ -884,35 +886,35 @@ void LMM::CalcRLWald (const double &l, const FUNC_PARAM &params, double &beta, d
{
size_t n_cvt=params.n_cvt;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
int df=(int)ni_test-(int)n_cvt-1;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size);
gsl_vector *v_temp=gsl_vector_alloc(params.eval->size);
-
+
gsl_vector_memcpy (v_temp, params.eval);
gsl_vector_scale (v_temp, l);
if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ gsl_vector_div (Hi_eval, v_temp);
+
+ CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
+
+ size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt);
size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt);
double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx);
- double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
- double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
-
+ double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
+ double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
+
beta=P_xy/P_xx;
double tau=(double)df/Px_yy;
- se=sqrt(1.0/(tau*P_xx));
- p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df);
-// p_wald=gsl_cdf_chisq_Q ((P_yy-Px_yy)*tau, 1);
-
+ se=sqrt(1.0/(tau*P_xx));
+ p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df);
+// p_wald=gsl_cdf_chisq_Q ((P_yy-Px_yy)*tau, 1);
+
gsl_matrix_free (Pab);
gsl_vector_free (Hi_eval);
gsl_vector_free (v_temp);
@@ -924,36 +926,36 @@ void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta,
{
size_t n_cvt=params.n_cvt;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
int df=(int)ni_test-(int)n_cvt-1;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size);
gsl_vector *v_temp=gsl_vector_alloc(params.eval->size);
-
+
gsl_vector_memcpy (v_temp, params.eval);
gsl_vector_scale (v_temp, l);
if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ gsl_vector_div (Hi_eval, v_temp);
+
+ CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
+
+ size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt);
size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt);
double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx);
- double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
- double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
-
+ double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
+ double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
+
beta=P_xy/P_xx;
double tau=(double)df/Px_yy;
- se=sqrt(1.0/(tau*P_xx));
-
+ se=sqrt(1.0/(tau*P_xx));
+
p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1.0, df);
-// p_score=gsl_cdf_chisq_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1);
-
+// p_score=gsl_cdf_chisq_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1);
+
gsl_matrix_free (Pab);
gsl_vector_free (Hi_eval);
gsl_vector_free (v_temp);
@@ -967,131 +969,131 @@ void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta,
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab)
+void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab)
{
size_t index_ab;
size_t n_cvt=UtW->size2;
-
+
gsl_vector *u_a=gsl_vector_alloc (Uty->size);
-
+
for (size_t a=1; a<=n_cvt+2; ++a) {
if (a==n_cvt+1) {continue;}
-
+
if (a==n_cvt+2) {gsl_vector_memcpy (u_a, Uty);}
else {
gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, a-1);
gsl_vector_memcpy (u_a, &UtW_col.vector);
}
-
- for (size_t b=a; b>=1; --b) {
+
+ for (size_t b=a; b>=1; --b) {
if (b==n_cvt+1) {continue;}
-
+
index_ab=GetabIndex (a, b, n_cvt);
gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab);
-
+
if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);}
else {
gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1);
gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
- }
-
+ }
+
gsl_vector_mul(&Uab_col.vector, u_a);
}
}
-
+
gsl_vector_free (u_a);
return;
}
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Utx, gsl_matrix *Uab)
-{
+void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Utx, gsl_matrix *Uab)
+{
size_t index_ab;
size_t n_cvt=UtW->size2;
-
- for (size_t b=1; b<=n_cvt+2; ++b) {
+
+ for (size_t b=1; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (n_cvt+1, b, n_cvt);
gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab);
-
+
if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);}
else if (b==n_cvt+1) {gsl_vector_memcpy (&Uab_col.vector, Utx);}
else {
gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1);
gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
}
-
+
gsl_vector_mul(&Uab_col.vector, Utx);
}
-
+
return;
}
-void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab)
+void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab)
{
size_t index_ab;
size_t n_cvt=W->size2;
-
+
double d;
gsl_vector *v_a=gsl_vector_alloc (y->size);
gsl_vector *v_b=gsl_vector_alloc (y->size);
-
+
for (size_t a=1; a<=n_cvt+2; ++a) {
if (a==n_cvt+1) {continue;}
-
+
if (a==n_cvt+2) {gsl_vector_memcpy (v_a, y);}
else {
gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1);
gsl_vector_memcpy (v_a, &W_col.vector);
}
-
- for (size_t b=a; b>=1; --b) {
+
+ for (size_t b=a; b>=1; --b) {
if (b==n_cvt+1) {continue;}
-
+
index_ab=GetabIndex (a, b, n_cvt);
-
+
if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);}
else {
gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
gsl_vector_memcpy (v_b, &W_col.vector);
- }
-
+ }
+
gsl_blas_ddot (v_a, v_b, &d);
gsl_vector_set(ab, index_ab, d);
}
}
-
+
gsl_vector_free (v_a);
gsl_vector_free (v_b);
return;
}
-void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_vector *ab)
-{
+void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_vector *ab)
+{
size_t index_ab;
size_t n_cvt=W->size2;
-
+
double d;
gsl_vector *v_b=gsl_vector_alloc (y->size);
-
- for (size_t b=1; b<=n_cvt+2; ++b) {
+
+ for (size_t b=1; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (n_cvt+1, b, n_cvt);
-
+
if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);}
else if (b==n_cvt+1) {gsl_vector_memcpy (v_b, x);}
else {
gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
gsl_vector_memcpy (v_b, &W_col.vector);
}
-
+
gsl_blas_ddot (x, v_b, &d);
gsl_vector_set(ab, index_ab, d);
}
-
+
gsl_vector_free (v_b);
-
+
return;
}
@@ -1099,101 +1101,101 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_
-void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x)
+void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x)
{
- ifstream infile (file_gene.c_str(), ifstream::in);
+ igzstream infile (file_gene.c_str(), igzstream::in);
if (!infile) {cout<<"error reading gene expression file:"<<file_gene<<endl; return;}
-
+
clock_t time_start=clock();
-
+
string line;
char *ch_ptr;
-
+
double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
double logl_H1=0.0, logl_H0=0.0, l_H0;
int c_phen;
string rs; //gene id
double d;
-
+
//Calculate basic quantities
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
gsl_vector *y=gsl_vector_alloc (U->size1);
gsl_vector *Uty=gsl_vector_alloc (U->size2);
gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
//header
getline(infile, line);
-
+
for (size_t t=0; t<ng_total; t++) {
!safeGetline(infile, line).eof();
if (t%d_pace==0 || t==ng_total-1) {ProgressBar ("Performing Analysis ", t, ng_total-1);}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
-
- c_phen=0;
+
+ c_phen=0;
for (size_t i=0; i<indicator_idv.size(); ++i) {
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
-
- d=atof(ch_ptr);
+
+ d=atof(ch_ptr);
gsl_vector_set(y, c_phen, d);
-
+
c_phen++;
}
-
+
time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//calculate null
time_start=clock();
-
+
gsl_matrix_set_zero (Uab);
-
+
CalcUab (UtW, Uty, Uab);
FUNC_PARAM param0={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
if (a_mode==2 || a_mode==3 || a_mode==4) {
CalcLambda('L', param0, l_min, l_max, n_region, l_H0, logl_H0);
}
-
+
//calculate alternative
CalcUab(UtW, Uty, Utx, Uab);
FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
//3 is before 1
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_H0, param1, beta, se, p_score);
}
-
+
if (a_mode==1 || a_mode==4) {
CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
-
+
if (a_mode==2 || a_mode==4) {
CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
}
-
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
cout<<endl;
-
+
gsl_vector_free (y);
gsl_vector_free (Uty);
gsl_matrix_free (Uab);
gsl_vector_free (ab);
-
+
infile.close();
infile.clear();
-
+
return;
}
@@ -1201,22 +1203,22 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
-void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
+void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
{
igzstream infile (file_geno.c_str(), igzstream::in);
// ifstream infile (file_geno.c_str(), ifstream::in);
if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
clock_t time_start=clock();
-
+
string line;
char *ch_ptr;
-
+
double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
double logl_H1=0.0;
int n_miss, c_phen;
double geno, x_mean;
-
+
//Calculate basic quantities
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
@@ -1224,45 +1226,45 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
gsl_vector *x_miss=gsl_vector_alloc (U->size1);
gsl_vector *Utx=gsl_vector_alloc (U->size2);
gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
// if (e_mode!=0) {
// gsl_vector_set_zero (ab);
// Calcab (W, y, ab);
-// }
-
- //start reading genotypes and analyze
+// }
+
+ //start reading genotypes and analyze
for (size_t t=0; t<indicator_snp.size(); ++t) {
// if (t>1) {break;}
!safeGetline(infile, line).eof();
if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
if (indicator_snp[t]==0) {continue;}
-
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
+ ch_ptr=strtok (NULL, " , \t");
+
x_mean=0.0; c_phen=0; n_miss=0;
gsl_vector_set_zero(x_miss);
for (size_t i=0; i<ni_total; ++i) {
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
-
+
if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
x_mean+=geno;
}
c_phen++;
- }
-
+ }
+
x_mean/=(double)(ni_test-n_miss);
-
+
for (size_t i=0; i<ni_test; ++i) {
if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
geno=gsl_vector_get(x, i);
@@ -1270,55 +1272,55 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
gsl_vector_set(x, i, 2-geno);
}
}
-
-
+
+
//calculate statistics
time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
CalcUab(UtW, Uty, Utx, Uab);
// if (e_mode!=0) {
// Calcab (W, y, x, ab);
// }
-
+
time_start=clock();
FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
//3 is before 1
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
-
+
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
-
+
if (a_mode==2 || a_mode==4) {
CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
- }
-
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+ }
+
if (x_mean>1) {beta*=-1;}
-
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
-
+
gsl_vector_free (x);
gsl_vector_free (x_miss);
gsl_vector_free (Utx);
gsl_matrix_free (Uab);
gsl_vector_free (ab);
-
+
infile.close();
infile.clear();
-
+
return;
}
@@ -1328,37 +1330,37 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
-void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
+void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
{
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
+
clock_t time_start=clock();
-
+
char ch[1];
- bitset<8> b;
-
+ bitset<8> b;
+
double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
double logl_H1=0.0;
int n_bit, n_miss, ci_total, ci_test;
double geno, x_mean;
-
+
//Calculate basic quantities
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
gsl_vector *x=gsl_vector_alloc (U->size1);
gsl_vector *Utx=gsl_vector_alloc (U->size2);
- gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
// if (e_mode!=0) {
// gsl_vector_set_zero (ab);
// Calcab (W, y, ab);
// }
-
+
//calculate n_bit and c, the number of bit for each snp
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
@@ -1368,16 +1370,16 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
infile.read(ch,1);
b=ch[0];
}
-
-
+
+
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
if (indicator_snp[t]==0) {continue;}
-
+
infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
+
//read genotypes
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
@@ -1390,7 +1392,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
else {gsl_vector_set(x, ci_test, -9); n_miss++; }
}
@@ -1398,105 +1400,345 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
ci_test++;
}
}
-
+
x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
+
+ for (size_t i=0; i<ni_test; ++i) {
geno=gsl_vector_get(x,i);
if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
if (x_mean>1) {
gsl_vector_set(x, i, 2-geno);
}
}
-
+
//calculate statistics
time_start=clock();
gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
CalcUab(UtW, Uty, Utx, Uab);
// if (e_mode!=0) {
// Calcab (W, y, x, ab);
// }
-
+
time_start=clock();
FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
//3 is before 1, for beta
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
-
+
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
-
+
if (a_mode==2 || a_mode==4) {
CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
- }
-
- if (x_mean>1) {beta*=-1;}
-
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+ }
+
+ if (x_mean>1) {beta*=-1;}
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
-
+
gsl_vector_free (x);
gsl_vector_free (Utx);
gsl_matrix_free (Uab);
gsl_vector_free (ab);
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
+// WJA added
+#include <assert.h>
+void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
+{
+ string file_bgen=file_oxford+".bgen";
+ ifstream infile (file_bgen.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;}
+
+
+ clock_t time_start=clock();
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double logl_H1=0.0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ //Calculate basic quantities
+ size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
+
+ gsl_vector *x=gsl_vector_alloc (U->size1);
+ gsl_vector *x_miss=gsl_vector_alloc (U->size1);
+ gsl_vector *Utx=gsl_vector_alloc (U->size2);
+ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
+ gsl_matrix_set_zero (Uab);
+ CalcUab (UtW, Uty, Uab);
+// if (e_mode!=0) {
+// gsl_vector_set_zero (ab);
+// Calcab (W, y, ab);
+// }
+
+ // read in header
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+ infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+ bgen_snp_block_offset-=4;
+ infile.ignore(4+bgen_header_length-20);
+ bgen_snp_block_offset-=4+bgen_header_length-20;
+ infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+ bgen_snp_block_offset-=4;
+ bool CompressedSNPBlocks=bgen_flags&0x1;
+// bool LongIds=bgen_flags&0x4;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<<std::endl;
+
+
+
+ //start reading genotypes and analyze
+ for (size_t t=0; t<indicator_snp.size(); ++t)
+ {
+
+// if (t>1) {break;}
+ if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ // read SNP header
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
-void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, vector<pair<size_t, double> > &pos_loglr)
+ infile.read(reinterpret_cast<char*>(&bgen_N),4);
+ infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+
+
+ uint16_t unzipped_data[3*bgen_N];
+
+ if (indicator_snp[t]==0) {
+ if(CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ else
+ bgen_P=6*bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+
+ if(CompressedSNPBlocks)
+ {
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size=6*bgen_N;
+
+ infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+
+ int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ assert(result == Z_OK);
+
+ }
+ else
+ {
+
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+ }
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<bgen_N; ++i) {
+ if (indicator_idv[i]==0) {continue;}
+
+
+ bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ // WJA
+ bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ else {
+
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=static_cast<double>(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ CalcUab(UtW, Uty, Utx, Uab);
+// if (e_mode!=0) {
+// Calcab (W, y, x, ab);
+// }
+
+ time_start=clock();
+ FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+ //3 is before 1
+ if (a_mode==3 || a_mode==4) {
+ CalcRLScore (l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+ }
+
+ if (x_mean>1) {beta*=-1;}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+ gsl_vector_free (x);
+ gsl_vector_free (x_miss);
+ gsl_vector_free (Utx);
+ gsl_matrix_free (Uab);
+ gsl_vector_free (ab);
+
+ infile.close();
+ infile.clear();
+
+ return;
+
+}
+
+
+
+void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, vector<pair<size_t, double> > &pos_loglr)
{
double logl_H0, logl_H1, log_lr, lambda0, lambda1;
-
+
gsl_vector *w=gsl_vector_alloc (Uty->size);
- gsl_matrix *Utw=gsl_matrix_alloc (Uty->size, 1);
+ gsl_matrix *Utw=gsl_matrix_alloc (Uty->size, 1);
gsl_matrix *Uab=gsl_matrix_alloc (Uty->size, 6);
- gsl_vector *ab=gsl_vector_alloc (6);
-
+ gsl_vector *ab=gsl_vector_alloc (6);
+
gsl_vector_set_zero(ab);
gsl_vector_set_all (w, 1.0);
- gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0);
- gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
-
- CalcUab (Utw, Uty, Uab) ;
- FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0};
-
+ gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
+
+ CalcUab (Utw, Uty, Uab) ;
+ FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0};
+
CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0);
-
+
for (size_t i=0; i<UtX->size2; ++i) {
gsl_vector_const_view UtX_col=gsl_matrix_const_column (UtX, i);
CalcUab(Utw, Uty, &UtX_col.vector, Uab);
FUNC_PARAM param1={false, UtX->size1, 1, K_eval, Uab, ab, 0};
-
+
CalcLambda ('L', param1, l_min, l_max, n_region, lambda1, logl_H1);
- log_lr=logl_H1-logl_H0;
-
+ log_lr=logl_H1-logl_H0;
+
pos_loglr.push_back(make_pair(i,log_lr) );
}
-
+
gsl_vector_free (w);
gsl_matrix_free (Utw);
gsl_matrix_free (Uab);
gsl_vector_free (ab);
-
+
return;
}
@@ -1506,17 +1748,17 @@ void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector
void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logf)
{
if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return;}
-
+
vector<pair<double, double> > lambda_lh;
-
+
//evaluate first order derivates in different intervals
double lambda_l, lambda_h, lambda_interval=log(l_max/l_min)/(double)n_region;
double dev1_l, dev1_h, logf_l, logf_h;
-
+
for (size_t i=0; i<n_region; ++i) {
lambda_l=l_min*exp(lambda_interval*i);
lambda_h=l_min*exp(lambda_interval*(i+1.0));
-
+
if (func_name=='R' || func_name=='r') {
dev1_l=LogRL_dev1 (lambda_l, &params);
dev1_h=LogRL_dev1 (lambda_h, &params);
@@ -1525,12 +1767,12 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
dev1_l=LogL_dev1 (lambda_l, &params);
dev1_h=LogL_dev1 (lambda_h, &params);
}
-
+
if (dev1_l*dev1_h<=0) {
lambda_lh.push_back(make_pair(lambda_l, lambda_h));
}
}
-
+
//if derivates do not change signs in any interval
if (lambda_lh.empty()) {
if (func_name=='R' || func_name=='r') {
@@ -1541,21 +1783,21 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
logf_l=LogL_f (l_min, &params);
logf_h=LogL_f (l_max, &params);
}
-
+
if (logf_l>=logf_h) {lambda=l_min; logf=logf_l;} else {lambda=l_max; logf=logf_h;}
}
else {
//if derivates change signs
int status;
int iter=0, max_iter=100;
- double l, l_temp;
-
+ double l, l_temp;
+
gsl_function F;
gsl_function_fdf FDF;
-
+
F.params=&params;
FDF.params=&params;
-
+
if (func_name=='R' || func_name=='r') {
F.function=&LogRL_dev1;
FDF.f=&LogRL_dev1;
@@ -1568,57 +1810,57 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
FDF.df=&LogL_dev2;
FDF.fdf=&LogL_dev12;
}
-
+
const gsl_root_fsolver_type *T_f;
gsl_root_fsolver *s_f;
T_f=gsl_root_fsolver_brent;
s_f=gsl_root_fsolver_alloc (T_f);
-
+
const gsl_root_fdfsolver_type *T_fdf;
gsl_root_fdfsolver *s_fdf;
T_fdf=gsl_root_fdfsolver_newton;
- s_fdf=gsl_root_fdfsolver_alloc(T_fdf);
-
+ s_fdf=gsl_root_fdfsolver_alloc(T_fdf);
+
for (vector<double>::size_type i=0; i<lambda_lh.size(); ++i) {
lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second;
-
+
gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h);
-
+
do {
iter++;
status=gsl_root_fsolver_iterate (s_f);
l=gsl_root_fsolver_root (s_f);
lambda_l=gsl_root_fsolver_x_lower (s_f);
lambda_h=gsl_root_fsolver_x_upper (s_f);
- status=gsl_root_test_interval (lambda_l, lambda_h, 0, 1e-1);
+ status=gsl_root_test_interval (lambda_l, lambda_h, 0, 1e-1);
}
- while (status==GSL_CONTINUE && iter<max_iter);
-
+ while (status==GSL_CONTINUE && iter<max_iter);
+
iter=0;
-
- gsl_root_fdfsolver_set (s_fdf, &FDF, l);
-
+
+ gsl_root_fdfsolver_set (s_fdf, &FDF, l);
+
do {
iter++;
status=gsl_root_fdfsolver_iterate (s_fdf);
l_temp=l;
l=gsl_root_fdfsolver_root (s_fdf);
- status=gsl_root_test_delta (l, l_temp, 0, 1e-5);
+ status=gsl_root_test_delta (l, l_temp, 0, 1e-5);
}
- while (status==GSL_CONTINUE && iter<max_iter && l>l_min && l<l_max);
-
+ while (status==GSL_CONTINUE && iter<max_iter && l>l_min && l<l_max);
+
l=l_temp;
if (l<l_min) {l=l_min;}
if (l>l_max) {l=l_max;}
- if (func_name=='R' || func_name=='r') {logf_l=LogRL_f (l, &params);} else {logf_l=LogL_f (l, &params);}
-
+ if (func_name=='R' || func_name=='r') {logf_l=LogRL_f (l, &params);} else {logf_l=LogL_f (l, &params);}
+
if (i==0) {logf=logf_l; lambda=l;}
else if (logf<logf_l) {logf=logf_l; lambda=l;}
else {}
}
- gsl_root_fsolver_free (s_f);
- gsl_root_fdfsolver_free (s_fdf);
-
+ gsl_root_fsolver_free (s_f);
+ gsl_root_fdfsolver_free (s_fdf);
+
if (func_name=='R' || func_name=='r') {
logf_l=LogRL_f (l_min, &params);
logf_h=LogRL_f (l_max, &params);
@@ -1627,11 +1869,11 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
logf_l=LogL_f (l_min, &params);
logf_h=LogL_f (l_max, &params);
}
-
- if (logf_l>logf) {lambda=l_min; logf=logf_l;}
+
+ if (logf_l>logf) {lambda=l_min; logf=logf_l;}
if (logf_h>logf) {lambda=l_max; logf=logf_h;}
}
-
+
return;
}
@@ -1646,53 +1888,53 @@ void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+
+ gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
// if (e_mode!=0) {
// gsl_vector_set_zero (ab);
// Calcab (W, y, ab);
// }
-
+
FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0);
-
- gsl_matrix_free(Uab);
- gsl_vector_free(ab);
-
+
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+
return;
}
-
-
+
+
//obtain REMLE estimate for PVE using lambda_remle
void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, const double trace_G, double &pve, double &pve_se)
{
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+
+ gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
// if (e_mode!=0) {
// gsl_vector_set_zero (ab);
// Calcab (W, y, ab);
// }
-
+
FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
double se=sqrt(-1.0/LogRL_dev2 (lambda, &param0));
-
+
pve=trace_G*lambda/(trace_G*lambda+1.0);
pve_se=trace_G/((trace_G*lambda+1.0)*(trace_G*lambda+1.0))*se;
-
+
gsl_matrix_free (Uab);
- gsl_vector_free (ab);
+ gsl_vector_free (ab);
return;
}
@@ -1703,9 +1945,9 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
{
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
+
+ gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc(eval->size);
gsl_vector *v_temp=gsl_vector_alloc(eval->size);
@@ -1713,16 +1955,16 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
gsl_matrix *WHiW=gsl_matrix_alloc(UtW->size2, UtW->size2);
gsl_vector *WHiy=gsl_vector_alloc(UtW->size2);
gsl_matrix *Vbeta=gsl_matrix_alloc(UtW->size2, UtW->size2);
-
+
gsl_matrix_set_zero (Uab);
- CalcUab (UtW, Uty, Uab);
-
+ CalcUab (UtW, Uty, Uab);
+
gsl_vector_memcpy (v_temp, eval);
gsl_vector_scale (v_temp, lambda);
gsl_vector_set_all (Hi_eval, 1.0);
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
//calculate beta
gsl_matrix_memcpy (HiW, UtW);
for (size_t i=0; i<UtW->size2; i++) {
@@ -1731,30 +1973,30 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
}
gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW);
gsl_blas_dgemv (CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
-
+
int sig;
gsl_permutation * pmt=gsl_permutation_alloc (UtW->size2);
LUDecomp (WHiW, pmt, &sig);
LUSolve (WHiW, pmt, WHiy, beta);
LUInvert (WHiW, pmt, Vbeta);
-
+
//calculate vg and ve
- CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
-
+ CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab);
+
+ size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
+
ve=P_yy/(double)(ni_test-n_cvt);
vg=ve*lambda;
-
+
//with ve, calculate se(beta)
gsl_matrix_scale(Vbeta, ve);
-
+
//obtain se_beta
for (size_t i=0; i<Vbeta->size1; i++) {
gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i) ) );
}
-
+
gsl_matrix_free(Uab);
gsl_matrix_free(Pab);
gsl_vector_free(ab);
@@ -1764,8 +2006,309 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
gsl_matrix_free(WHiW);
gsl_vector_free(WHiy);
gsl_matrix_free(Vbeta);
-
+
gsl_permutation_free(pmt);
return;
}
+
+
+
+
+
+
+void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env)
+{
+ igzstream infile (file_geno.c_str(), igzstream::in);
+// ifstream infile (file_geno.c_str(), ifstream::in);
+ if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+
+ clock_t time_start=clock();
+
+ string line;
+ char *ch_ptr;
+
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double logl_H1=0.0, logl_H0=0.0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ //Calculate basic quantities
+ size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
+
+ gsl_vector *x=gsl_vector_alloc (U->size1);
+ gsl_vector *x_miss=gsl_vector_alloc (U->size1);
+ gsl_vector *Utx=gsl_vector_alloc (U->size2);
+ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
+ gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
+ gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+ gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
+ gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
+ gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1);
+
+ //gsl_matrix_set_zero (Uab);
+ // CalcUab (UtW, Uty, Uab);
+// if (e_mode!=0) {
+// gsl_vector_set_zero (ab);
+// Calcab (W, y, ab);
+// }
+
+ //start reading genotypes and analyze
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+// if (t>1) {break;}
+ !safeGetline(infile, line).eof();
+ if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<ni_total; ++i) {
+ ch_ptr=strtok (NULL, " , \t");
+ if (indicator_idv[i]==0) {continue;}
+
+ if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ else {
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+ gsl_vector_mul (x, env);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ gsl_matrix_set_zero (Uab);
+ CalcUab (UtW_expand, Uty, Uab);
+
+ if (a_mode==2 || a_mode==4) {
+ FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
+ CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+ }
+
+ CalcUab(UtW_expand, Uty, Utx, Uab);
+// if (e_mode!=0) {
+// Calcab (W, y, x, ab);
+// }
+
+ time_start=clock();
+ FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
+
+ //3 is before 1
+ if (a_mode==3 || a_mode==4) {
+ CalcRLScore (l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
+ }
+
+ if (x_mean>1) {beta*=-1;}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+ gsl_vector_free (x);
+ gsl_vector_free (x_miss);
+ gsl_vector_free (Utx);
+ gsl_matrix_free (Uab);
+ gsl_vector_free (ab);
+
+ gsl_matrix_free (UtW_expand);
+
+ infile.close();
+ infile.clear();
+
+ return;
+}
+
+
+
+
+
+
+
+void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env)
+{
+ string file_bed=file_bfile+".bed";
+ ifstream infile (file_bed.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+
+ clock_t time_start=clock();
+
+ char ch[1];
+ bitset<8> b;
+
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double logl_H1=0.0, logl_H0=0.0;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+
+ //Calculate basic quantities
+ size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
+
+ gsl_vector *x=gsl_vector_alloc (U->size1);
+ gsl_vector *Utx=gsl_vector_alloc (U->size2);
+ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
+ gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
+ gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+ gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
+ gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
+ gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1);
+
+ //gsl_matrix_set_zero (Uab);
+ //CalcUab (UtW, Uty, Uab);
+// if (e_mode!=0) {
+// gsl_vector_set_zero (ab);
+// Calcab (W, y, ab);
+// }
+
+ //calculate n_bit and c, the number of bit for each snp
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1; }
+
+ //print the first three majic numbers
+ for (int i=0; i<3; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ }
+
+
+ for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
+ if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+ //read genotypes
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+ if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
+ else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ else {gsl_vector_set(x, ci_test, -9); n_miss++; }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ geno=gsl_vector_get(x,i);
+ if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+ gsl_vector_mul (x, env);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ gsl_matrix_set_zero (Uab);
+ CalcUab (UtW_expand, Uty, Uab);
+
+ if (a_mode==2 || a_mode==4) {
+ FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
+ CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+ }
+
+ CalcUab(UtW_expand, Uty, Utx, Uab);
+
+// if (e_mode!=0) {
+// Calcab (W, y, x, ab);
+// }
+
+ time_start=clock();
+ FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
+
+ //3 is before 1, for beta
+ if (a_mode==3 || a_mode==4) {
+ CalcRLScore (l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
+ }
+
+ if (x_mean>1) {beta*=-1;}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+ gsl_vector_free (x);
+ gsl_vector_free (Utx);
+ gsl_matrix_free (Uab);
+ gsl_vector_free (ab);
+
+ gsl_matrix_free (UtW_expand);
+
+ infile.close();
+ infile.clear();
+
+ return;
+}