/* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" #include "gsl/gsl_min.h" #include "gsl/gsl_integration.h" #include "io.h" #include "eigenlib.h" #include "lapack.h" #include "gzstream.h" #ifdef FORCE_FLOAT #include "lmm_float.h" #else #include "lmm.h" #endif using namespace std; void LMM::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; d_pace=cPar.d_pace; file_bfile=cPar.file_bfile; file_geno=cPar.file_geno; file_out=cPar.file_out; path_out=cPar.path_out; file_gene=cPar.file_gene; // WJA added file_oxford=cPar.file_oxford; l_min=cPar.l_min; l_max=cPar.l_max; n_region=cPar.n_region; l_mle_null=cPar.l_mle_null; logl_mle_H0=cPar.logl_mle_H0; time_UtX=0.0; time_opt=0.0; ni_total=cPar.ni_total; ns_total=cPar.ns_total; ni_test=cPar.ni_test; ns_test=cPar.ns_test; n_cvt=cPar.n_cvt; ng_total=cPar.ng_total; ng_test=0; indicator_idv=cPar.indicator_idv; indicator_snp=cPar.indicator_snp; snpInfo=cPar.snpInfo; return; } void LMM::CopyToParam (PARAM &cPar) { cPar.time_UtX=time_UtX; cPar.time_opt=time_opt; cPar.ng_test=ng_test; return; } void LMM::WriteFiles () { string file_str; file_str=path_out+"/"+file_out; file_str+=".assoc.txt"; ofstream outfile (file_str.c_str(), ofstream::out); if (!outfile) {cout<<"error writing file: "<::size_type t=0; tn_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; size_t nc_total; if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;} double f=0.0, logdet_h=0.0, d; size_t index_yy; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); for (size_t i=0; i<(p->eval)->size; ++i) { d=gsl_vector_get (v_temp, i); logdet_h+=log(fabs(d)); } CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); double c=0.5*(double)ni_test*(log((double)ni_test)-log(2*M_PI)-1.0); index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); double P_yy=gsl_matrix_get (Pab, nc_total, index_yy); f=c-0.5*logdet_h-0.5*(double)ni_test*log(P_yy); gsl_matrix_free (Pab); gsl_vector_free (Hi_eval); gsl_vector_free (v_temp); return f; } double LogL_dev1 (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; size_t nc_total; if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;} double dev1=0.0, trace_Hi=0.0; size_t index_yy; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); gsl_vector_memcpy (HiHi_eval, Hi_eval); gsl_vector_mul (HiHi_eval, Hi_eval); gsl_vector_set_all (v_temp, 1.0); gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); if (p->e_mode!=0) {trace_Hi=(double)ni_test-trace_Hi;} CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); double trace_HiK=((double)ni_test-trace_Hi)/l; index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); double P_yy=gsl_matrix_get (Pab, nc_total, index_yy); double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy); double yPKPy=(P_yy-PP_yy)/l; dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy; gsl_matrix_free (Pab); gsl_matrix_free (PPab); gsl_vector_free (Hi_eval); gsl_vector_free (HiHi_eval); gsl_vector_free (v_temp); return dev1; } double LogL_dev2 (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; size_t nc_total; if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;} double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0; size_t index_yy; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); gsl_vector_memcpy (HiHi_eval, Hi_eval); gsl_vector_mul (HiHi_eval, Hi_eval); gsl_vector_memcpy (HiHiHi_eval, HiHi_eval); gsl_vector_mul (HiHiHi_eval, Hi_eval); gsl_vector_set_all (v_temp, 1.0); gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi); if (p->e_mode!=0) { trace_Hi=(double)ni_test-trace_Hi; trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test; } CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l); index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); double P_yy=gsl_matrix_get (Pab, nc_total, index_yy); double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy); double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy); double yPKPy=(P_yy-PP_yy)/l; double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); gsl_matrix_free (Pab); gsl_matrix_free (PPab); gsl_matrix_free (PPPab); gsl_vector_free (Hi_eval); gsl_vector_free (HiHi_eval); gsl_vector_free (HiHiHi_eval); gsl_vector_free (v_temp); return dev2; } void LogL_dev12 (double l, void *params, double *dev1, double *dev2) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; size_t nc_total; if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;} double trace_Hi=0.0, trace_HiHi=0.0; size_t index_yy; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); gsl_vector_memcpy (HiHi_eval, Hi_eval); gsl_vector_mul (HiHi_eval, Hi_eval); gsl_vector_memcpy (HiHiHi_eval, HiHi_eval); gsl_vector_mul (HiHiHi_eval, Hi_eval); gsl_vector_set_all (v_temp, 1.0); gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi); if (p->e_mode!=0) { trace_Hi=(double)ni_test-trace_Hi; trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test; } CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); double trace_HiK=((double)ni_test-trace_Hi)/l; double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l); index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); double P_yy=gsl_matrix_get (Pab, nc_total, index_yy); double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy); double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy); double yPKPy=(P_yy-PP_yy)/l; double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); *dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy; *dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); gsl_matrix_free (Pab); gsl_matrix_free (PPab); gsl_matrix_free (PPPab); gsl_vector_free (Hi_eval); gsl_vector_free (HiHi_eval); gsl_vector_free (HiHiHi_eval); gsl_vector_free (v_temp); return; } double LogRL_f (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; double df; size_t nc_total; if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; } else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} double f=0.0, logdet_h=0.0, logdet_hiw=0.0, d; size_t index_ww; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *Iab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); for (size_t i=0; i<(p->eval)->size; ++i) { d=gsl_vector_get (v_temp, i); logdet_h+=log(fabs(d)); } CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); gsl_vector_set_all (v_temp, 1.0); CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab); //calculate |WHiW|-|WW| logdet_hiw=0.0; for (size_t i=0; in_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; double df; size_t nc_total; if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; } else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} double dev1=0.0, trace_Hi=0.0; size_t index_ww; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); gsl_vector_memcpy (HiHi_eval, Hi_eval); gsl_vector_mul (HiHi_eval, Hi_eval); gsl_vector_set_all (v_temp, 1.0); gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); if (p->e_mode!=0) { trace_Hi=(double)ni_test-trace_Hi; } CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); //calculate tracePK and trace PKPK double trace_P=trace_Hi; double ps_ww, ps2_ww; for (size_t i=0; in_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; double df; size_t nc_total; if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; } else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0; size_t index_ww; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); gsl_vector_memcpy (HiHi_eval, Hi_eval); gsl_vector_mul (HiHi_eval, Hi_eval); gsl_vector_memcpy (HiHiHi_eval, HiHi_eval); gsl_vector_mul (HiHiHi_eval, Hi_eval); gsl_vector_set_all (v_temp, 1.0); gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi); if (p->e_mode!=0) { trace_Hi=(double)ni_test-trace_Hi; trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test; } CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); //calculate tracePK and trace PKPK double trace_P=trace_Hi, trace_PP=trace_HiHi; double ps_ww, ps2_ww, ps3_ww; for (size_t i=0; in_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; double df; size_t nc_total; if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; } else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} double trace_Hi=0.0, trace_HiHi=0.0; size_t index_ww; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size); gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); gsl_vector_memcpy (HiHi_eval, Hi_eval); gsl_vector_mul (HiHi_eval, Hi_eval); gsl_vector_memcpy (HiHiHi_eval, HiHi_eval); gsl_vector_mul (HiHiHi_eval, Hi_eval); gsl_vector_set_all (v_temp, 1.0); gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi); if (p->e_mode!=0) { trace_Hi=(double)ni_test-trace_Hi; trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test; } CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); //calculate tracePK and trace PKPK double trace_P=trace_Hi, trace_PP=trace_HiHi; double ps_ww, ps2_ww, ps3_ww; for (size_t i=0; isize); gsl_vector *v_temp=gsl_vector_alloc(params.eval->size); gsl_vector_memcpy (v_temp, params.eval); gsl_vector_scale (v_temp, l); if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab); size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt); size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt); double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy); double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx); double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy); double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy); beta=P_xy/P_xx; double tau=(double)df/Px_yy; se=sqrt(1.0/(tau*P_xx)); p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df); // p_wald=gsl_cdf_chisq_Q ((P_yy-Px_yy)*tau, 1); gsl_matrix_free (Pab); gsl_vector_free (Hi_eval); gsl_vector_free (v_temp); return ; } void LMM::CalcRLScore (const double &l, const FUNC_PARAM ¶ms, double &beta, double &se, double &p_score) { size_t n_cvt=params.n_cvt; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; int df=(int)ni_test-(int)n_cvt-1; gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size); gsl_vector *v_temp=gsl_vector_alloc(params.eval->size); gsl_vector_memcpy (v_temp, params.eval); gsl_vector_scale (v_temp, l); if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab); size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt); size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt); double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy); double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx); double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy); double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy); beta=P_xy/P_xx; double tau=(double)df/Px_yy; se=sqrt(1.0/(tau*P_xx)); p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1.0, df); // p_score=gsl_cdf_chisq_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1); gsl_matrix_free (Pab); gsl_vector_free (Hi_eval); gsl_vector_free (v_temp); return ; } void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) { size_t index_ab; size_t n_cvt=UtW->size2; gsl_vector *u_a=gsl_vector_alloc (Uty->size); for (size_t a=1; a<=n_cvt+2; ++a) { if (a==n_cvt+1) {continue;} if (a==n_cvt+2) {gsl_vector_memcpy (u_a, Uty);} else { gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, a-1); gsl_vector_memcpy (u_a, &UtW_col.vector); } for (size_t b=a; b>=1; --b) { if (b==n_cvt+1) {continue;} index_ab=GetabIndex (a, b, n_cvt); gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab); if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);} else { gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1); gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector); } gsl_vector_mul(&Uab_col.vector, u_a); } } gsl_vector_free (u_a); return; } void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Utx, gsl_matrix *Uab) { size_t index_ab; size_t n_cvt=UtW->size2; for (size_t b=1; b<=n_cvt+2; ++b) { index_ab=GetabIndex (n_cvt+1, b, n_cvt); gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab); if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);} else if (b==n_cvt+1) {gsl_vector_memcpy (&Uab_col.vector, Utx);} else { gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1); gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector); } gsl_vector_mul(&Uab_col.vector, Utx); } return; } void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) { size_t index_ab; size_t n_cvt=W->size2; double d; gsl_vector *v_a=gsl_vector_alloc (y->size); gsl_vector *v_b=gsl_vector_alloc (y->size); for (size_t a=1; a<=n_cvt+2; ++a) { if (a==n_cvt+1) {continue;} if (a==n_cvt+2) {gsl_vector_memcpy (v_a, y);} else { gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1); gsl_vector_memcpy (v_a, &W_col.vector); } for (size_t b=a; b>=1; --b) { if (b==n_cvt+1) {continue;} index_ab=GetabIndex (a, b, n_cvt); if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);} else { gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1); gsl_vector_memcpy (v_b, &W_col.vector); } gsl_blas_ddot (v_a, v_b, &d); gsl_vector_set(ab, index_ab, d); } } gsl_vector_free (v_a); gsl_vector_free (v_b); return; } void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_vector *ab) { size_t index_ab; size_t n_cvt=W->size2; double d; gsl_vector *v_b=gsl_vector_alloc (y->size); for (size_t b=1; b<=n_cvt+2; ++b) { index_ab=GetabIndex (n_cvt+1, b, n_cvt); if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);} else if (b==n_cvt+1) {gsl_vector_memcpy (v_b, x);} else { gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1); gsl_vector_memcpy (v_b, &W_col.vector); } gsl_blas_ddot (x, v_b, &d); gsl_vector_set(ab, index_ab, d); } gsl_vector_free (v_b); return; } void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x) { igzstream infile (file_gene.c_str(), igzstream::in); if (!infile) {cout<<"error reading gene expression file:"<size1); gsl_vector *Uty=gsl_vector_alloc (U->size2); gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); //header getline(infile, line); for (size_t t=0; tsize1); gsl_vector *x_miss=gsl_vector_alloc (U->size1); gsl_vector *Utx=gsl_vector_alloc (U->size2); gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); //create a large matrix size_t msize=10000; gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix_set_zero(Xlarge); gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); // if (e_mode!=0) { // gsl_vector_set_zero (ab); // Calcab (W, y, ab); // } //start reading genotypes and analyze size_t c=0, t_last=0; for (size_t t=0; t1) {break;} !safeGetline(infile, line).eof(); if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);} if (indicator_snp[t]==0) {continue;} ch_ptr=strtok ((char *)line.c_str(), " , \t"); ch_ptr=strtok (NULL, " , \t"); ch_ptr=strtok (NULL, " , \t"); x_mean=0.0; c_phen=0; n_miss=0; gsl_vector_set_zero(x_miss); for (size_t i=0; i1) { // gsl_vector_set(x, i, 2-geno); //} } /* //calculate statistics time_start=clock(); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); */ gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); gsl_vector_memcpy (&Xlarge_col.vector, x); c++; if (c%msize==0 || c==t_last) { size_t l=0; if (c%msize==0) {l=msize;} else {l=c%msize;} gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); time_start=clock(); eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); gsl_matrix_set_zero (Xlarge); for (size_t i=0; i1) {beta*=-1;} time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //store summary data SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } } } cout< b; double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; double logl_H1=0.0; int n_bit, n_miss, ci_total, ci_test; double geno, x_mean; //Calculate basic quantities size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; gsl_vector *x=gsl_vector_alloc (U->size1); gsl_vector *Utx=gsl_vector_alloc (U->size2); gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); //create a large matrix size_t msize=10000; gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix_set_zero(Xlarge); gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); // if (e_mode!=0) { // gsl_vector_set_zero (ab); // Calcab (W, y, ab); // } //calculate n_bit and c, the number of bit for each snp if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1; } //print the first three majic numbers for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; } size_t c=0, t_last=0; for (size_t t=0; t::size_type t=0; t1) { //gsl_vector_set(x, i, 2-geno); //} } /* //calculate statistics time_start=clock(); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); */ gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); gsl_vector_memcpy (&Xlarge_col.vector, x); c++; if (c%msize==0 || c==t_last) { size_t l=0; if (c%msize==0) {l=msize;} else {l=c%msize;} gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); time_start=clock(); eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); gsl_matrix_set_zero (Xlarge); for (size_t i=0; i1) {beta*=-1;} time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //store summary data SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } } } cout< void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y) { string file_bgen=file_oxford+".bgen"; ifstream infile (file_bgen.c_str(), ios::binary); if (!infile) {cout<<"error reading bgen file:"<size1); gsl_vector *x_miss=gsl_vector_alloc (U->size1); gsl_vector *Utx=gsl_vector_alloc (U->size2); gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); //create a large matrix size_t msize=10000; gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix_set_zero(Xlarge); gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); // if (e_mode!=0) { // gsl_vector_set_zero (ab); // Calcab (W, y, ab); // } // read in header uint32_t bgen_snp_block_offset; uint32_t bgen_header_length; uint32_t bgen_nsamples; uint32_t bgen_nsnps; uint32_t bgen_flags; infile.read(reinterpret_cast(&bgen_snp_block_offset),4); infile.read(reinterpret_cast(&bgen_header_length),4); bgen_snp_block_offset-=4; infile.read(reinterpret_cast(&bgen_nsnps),4); bgen_snp_block_offset-=4; infile.read(reinterpret_cast(&bgen_nsamples),4); bgen_snp_block_offset-=4; infile.ignore(4+bgen_header_length-20); bgen_snp_block_offset-=4+bgen_header_length-20; infile.read(reinterpret_cast(&bgen_flags),4); bgen_snp_block_offset-=4; bool CompressedSNPBlocks=bgen_flags&0x1; // bool LongIds=bgen_flags&0x4; infile.ignore(bgen_snp_block_offset); double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss; uint32_t bgen_N; uint16_t bgen_LS; uint16_t bgen_LR; uint16_t bgen_LC; uint32_t bgen_SNP_pos; uint32_t bgen_LA; std::string bgen_A_allele; uint32_t bgen_LB; std::string bgen_B_allele; uint32_t bgen_P; size_t unzipped_data_size; string id; string rs; string chr; std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<1) {break;} if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);} if (indicator_snp[t]==0) {continue;} // read SNP header id.clear(); rs.clear(); chr.clear(); bgen_A_allele.clear(); bgen_B_allele.clear(); infile.read(reinterpret_cast(&bgen_N),4); infile.read(reinterpret_cast(&bgen_LS),2); id.resize(bgen_LS); infile.read(&id[0], bgen_LS); infile.read(reinterpret_cast(&bgen_LR),2); rs.resize(bgen_LR); infile.read(&rs[0], bgen_LR); infile.read(reinterpret_cast(&bgen_LC),2); chr.resize(bgen_LC); infile.read(&chr[0], bgen_LC); infile.read(reinterpret_cast(&bgen_SNP_pos),4); infile.read(reinterpret_cast(&bgen_LA),4); bgen_A_allele.resize(bgen_LA); infile.read(&bgen_A_allele[0], bgen_LA); infile.read(reinterpret_cast(&bgen_LB),4); bgen_B_allele.resize(bgen_LB); infile.read(&bgen_B_allele[0], bgen_LB); uint16_t unzipped_data[3*bgen_N]; if (indicator_snp[t]==0) { if(CompressedSNPBlocks) infile.read(reinterpret_cast(&bgen_P),4); else bgen_P=6*bgen_N; infile.ignore(static_cast(bgen_P)); continue; } if(CompressedSNPBlocks) { infile.read(reinterpret_cast(&bgen_P),4); uint8_t zipped_data[bgen_P]; unzipped_data_size=6*bgen_N; infile.read(reinterpret_cast(zipped_data),bgen_P); int result=uncompress(reinterpret_cast(unzipped_data), reinterpret_cast(&unzipped_data_size), reinterpret_cast(zipped_data), static_cast (bgen_P)); assert(result == Z_OK); } else { bgen_P=6*bgen_N; infile.read(reinterpret_cast(unzipped_data),bgen_P); } x_mean=0.0; c_phen=0; n_miss=0; gsl_vector_set_zero(x_miss); for (size_t i=0; i(unzipped_data[i*3])/32768.0; bgen_geno_prob_AB=static_cast(unzipped_data[i*3+1])/32768.0; bgen_geno_prob_BB=static_cast(unzipped_data[i*3+2])/32768.0; // WJA bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB; if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;} else { bgen_geno_prob_AA/=bgen_geno_prob_non_miss; bgen_geno_prob_AB/=bgen_geno_prob_non_miss; bgen_geno_prob_BB/=bgen_geno_prob_non_miss; geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; gsl_vector_set(x, c_phen, geno); gsl_vector_set(x_miss, c_phen, 1.0); x_mean+=geno; } c_phen++; } x_mean/=static_cast(ni_test-n_miss); for (size_t i=0; i1) { //gsl_vector_set(x, i, 2-geno); //} } /* //calculate statistics time_start=clock(); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); */ gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); gsl_vector_memcpy (&Xlarge_col.vector, x); c++; if (c%msize==0 || c==t_last ) { size_t l=0; if (c%msize==0) {l=msize;} else {l=c%msize;} gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); time_start=clock(); eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); gsl_matrix_set_zero (Xlarge); for (size_t i=0; i1) {beta*=-1;} time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //store summary data SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } } } cout< > &pos_loglr) { double logl_H0, logl_H1, log_lr, lambda0, lambda1; gsl_vector *w=gsl_vector_alloc (Uty->size); gsl_matrix *Utw=gsl_matrix_alloc (Uty->size, 1); gsl_matrix *Uab=gsl_matrix_alloc (Uty->size, 6); gsl_vector *ab=gsl_vector_alloc (6); gsl_vector_set_zero(ab); gsl_vector_set_all (w, 1.0); gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0); gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector); CalcUab (Utw, Uty, Uab) ; FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0}; CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0); for (size_t i=0; isize2; ++i) { gsl_vector_const_view UtX_col=gsl_matrix_const_column (UtX, i); CalcUab(Utw, Uty, &UtX_col.vector, Uab); FUNC_PARAM param1={false, UtX->size1, 1, K_eval, Uab, ab, 0}; CalcLambda ('L', param1, l_min, l_max, n_region, lambda1, logl_H1); log_lr=logl_H1-logl_H0; pos_loglr.push_back(make_pair(i,log_lr) ); } gsl_vector_free (w); gsl_matrix_free (Utw); gsl_matrix_free (Uab); gsl_vector_free (ab); return; } void CalcLambda (const char func_name, FUNC_PARAM ¶ms, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logf) { if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."< > lambda_lh; //evaluate first order derivates in different intervals double lambda_l, lambda_h, lambda_interval=log(l_max/l_min)/(double)n_region; double dev1_l, dev1_h, logf_l, logf_h; for (size_t i=0; i=logf_h) {lambda=l_min; logf=logf_l;} else {lambda=l_max; logf=logf_h;} } else { //if derivates change signs int status; int iter=0, max_iter=100; double l, l_temp; gsl_function F; gsl_function_fdf FDF; F.params=¶ms; FDF.params=¶ms; if (func_name=='R' || func_name=='r') { F.function=&LogRL_dev1; FDF.f=&LogRL_dev1; FDF.df=&LogRL_dev2; FDF.fdf=&LogRL_dev12; } else { F.function=&LogL_dev1; FDF.f=&LogL_dev1; FDF.df=&LogL_dev2; FDF.fdf=&LogL_dev12; } const gsl_root_fsolver_type *T_f; gsl_root_fsolver *s_f; T_f=gsl_root_fsolver_brent; s_f=gsl_root_fsolver_alloc (T_f); const gsl_root_fdfsolver_type *T_fdf; gsl_root_fdfsolver *s_fdf; T_fdf=gsl_root_fdfsolver_newton; s_fdf=gsl_root_fdfsolver_alloc(T_fdf); for (vector::size_type i=0; il_min && ll_max) {l=l_max;} if (func_name=='R' || func_name=='r') {logf_l=LogRL_f (l, ¶ms);} else {logf_l=LogL_f (l, ¶ms);} if (i==0) {logf=logf_l; lambda=l;} else if (logflogf) {lambda=l_min; logf=logf_l;} if (logf_h>logf) {lambda=l_max; logf=logf_h;} } return; } //calculate lambda in the null model void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logl_H0) { if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<size2, ni_test=UtW->size1; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); // if (e_mode!=0) { // gsl_vector_set_zero (ab); // Calcab (W, y, ab); // } FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0}; CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0); gsl_matrix_free(Uab); gsl_vector_free(ab); return; } //obtain REMLE estimate for PVE using lambda_remle void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, const double trace_G, double &pve, double &pve_se) { size_t n_cvt=UtW->size2, ni_test=UtW->size1; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); // if (e_mode!=0) { // gsl_vector_set_zero (ab); // Calcab (W, y, ab); // } FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0}; double se=sqrt(-1.0/LogRL_dev2 (lambda, ¶m0)); pve=trace_G*lambda/(trace_G*lambda+1.0); pve_se=trace_G/((trace_G*lambda+1.0)*(trace_G*lambda+1.0))*se; gsl_matrix_free (Uab); gsl_vector_free (ab); return; } //obtain REML estimate for Vg and Ve using lambda_remle //obtain beta and se(beta) for coefficients //ab is not used when e_mode==0 void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, double &vg, double &ve, gsl_vector *beta, gsl_vector *se_beta) { size_t n_cvt=UtW->size2, ni_test=UtW->size1; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); gsl_vector *Hi_eval=gsl_vector_alloc(eval->size); gsl_vector *v_temp=gsl_vector_alloc(eval->size); gsl_matrix *HiW=gsl_matrix_alloc(eval->size, UtW->size2); gsl_matrix *WHiW=gsl_matrix_alloc(UtW->size2, UtW->size2); gsl_vector *WHiy=gsl_vector_alloc(UtW->size2); gsl_matrix *Vbeta=gsl_matrix_alloc(UtW->size2, UtW->size2); gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); gsl_vector_memcpy (v_temp, eval); gsl_vector_scale (v_temp, lambda); gsl_vector_set_all (Hi_eval, 1.0); gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); //calculate beta gsl_matrix_memcpy (HiW, UtW); for (size_t i=0; isize2; i++) { gsl_vector_view HiW_col=gsl_matrix_column(HiW, i); gsl_vector_mul(&HiW_col.vector, Hi_eval); } gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW); gsl_blas_dgemv (CblasTrans, 1.0, HiW, Uty, 0.0, WHiy); int sig; gsl_permutation * pmt=gsl_permutation_alloc (UtW->size2); LUDecomp (WHiW, pmt, &sig); LUSolve (WHiW, pmt, WHiy, beta); LUInvert (WHiW, pmt, Vbeta); //calculate vg and ve CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab); size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy); ve=P_yy/(double)(ni_test-n_cvt); vg=ve*lambda; //with ve, calculate se(beta) gsl_matrix_scale(Vbeta, ve); //obtain se_beta for (size_t i=0; isize1; i++) { gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i) ) ); } gsl_matrix_free(Uab); gsl_matrix_free(Pab); gsl_vector_free(ab); gsl_vector_free(Hi_eval); gsl_vector_free(v_temp); gsl_matrix_free(HiW); gsl_matrix_free(WHiW); gsl_vector_free(WHiy); gsl_matrix_free(Vbeta); gsl_permutation_free(pmt); return; } void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env) { igzstream infile (file_geno.c_str(), igzstream::in); // ifstream infile (file_geno.c_str(), ifstream::in); if (!infile) {cout<<"error reading genotype file:"<size1); gsl_vector *x_miss=gsl_vector_alloc (U->size1); gsl_vector *Utx=gsl_vector_alloc (U->size2); gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2); gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW); gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2); gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1); //gsl_matrix_set_zero (Uab); // CalcUab (UtW, Uty, Uab); // if (e_mode!=0) { // gsl_vector_set_zero (ab); // Calcab (W, y, ab); // } //start reading genotypes and analyze for (size_t t=0; t1) {break;} !safeGetline(infile, line).eof(); if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);} if (indicator_snp[t]==0) {continue;} ch_ptr=strtok ((char *)line.c_str(), " , \t"); ch_ptr=strtok (NULL, " , \t"); ch_ptr=strtok (NULL, " , \t"); x_mean=0.0; c_phen=0; n_miss=0; gsl_vector_set_zero(x_miss); for (size_t i=0; i1) { gsl_vector_set(x, i, 2-geno); } } //calculate statistics time_start=clock(); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector); gsl_vector_mul (x, env); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); gsl_matrix_set_zero (Uab); CalcUab (UtW_expand, Uty, Uab); if (a_mode==2 || a_mode==4) { FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0}; CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0); } CalcUab(UtW_expand, Uty, Utx, Uab); // if (e_mode!=0) { // Calcab (W, y, x, ab); // } time_start=clock(); FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0}; //3 is before 1 if (a_mode==3 || a_mode==4) { CalcRLScore (l_mle_null, param1, beta, se, p_score); } if (a_mode==1 || a_mode==4) { CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); CalcRLWald (lambda_remle, param1, beta, se, p_wald); } if (a_mode==2 || a_mode==4) { CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); } if (x_mean>1) {beta*=-1;} time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //store summary data SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } cout< b; double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; double logl_H1=0.0, logl_H0=0.0; int n_bit, n_miss, ci_total, ci_test; double geno, x_mean; //Calculate basic quantities size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2; gsl_vector *x=gsl_vector_alloc (U->size1); gsl_vector *Utx=gsl_vector_alloc (U->size2); gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2); gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW); gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2); gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1); //gsl_matrix_set_zero (Uab); //CalcUab (UtW, Uty, Uab); // if (e_mode!=0) { // gsl_vector_set_zero (ab); // Calcab (W, y, ab); // } //calculate n_bit and c, the number of bit for each snp if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1; } //print the first three majic numbers for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; } for (vector::size_type t=0; t1) { gsl_vector_set(x, i, 2-geno); } } //calculate statistics time_start=clock(); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector); gsl_vector_mul (x, env); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); gsl_matrix_set_zero (Uab); CalcUab (UtW_expand, Uty, Uab); if (a_mode==2 || a_mode==4) { FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0}; CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0); } CalcUab(UtW_expand, Uty, Utx, Uab); // if (e_mode!=0) { // Calcab (W, y, x, ab); // } time_start=clock(); FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0}; //3 is before 1, for beta if (a_mode==3 || a_mode==4) { CalcRLScore (l_mle_null, param1, beta, se, p_score); } if (a_mode==1 || a_mode==4) { CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); CalcRLWald (lambda_remle, param1, beta, se, p_wald); } if (a_mode==2 || a_mode==4) { CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); } if (x_mean>1) {beta*=-1;} time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //store summary data SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } cout<