From 17deca2d54827a00df3ea4d98df700fc2b8ed777 Mon Sep 17 00:00:00 2001 From: xiangzhou Date: Sat, 20 Sep 2014 10:17:34 -0400 Subject: initial upload, version 0.95alpha --- mvlmm.cpp | 3748 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3748 insertions(+) create mode 100644 mvlmm.cpp (limited to 'mvlmm.cpp') diff --git a/mvlmm.cpp b/mvlmm.cpp new file mode 100644 index 0000000..56540d8 --- /dev/null +++ b/mvlmm.cpp @@ -0,0 +1,3748 @@ +/* + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011 Xiang Zhou + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "gsl/gsl_vector.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_blas.h" + +#include "gsl/gsl_cdf.h" +#include "gsl/gsl_roots.h" +#include "gsl/gsl_min.h" +#include "gsl/gsl_integration.h" + +#include "io.h" +#include "lapack.h" +#include "gzstream.h" + +#ifdef FORCE_FLOAT +#include "lmm_float.h" +#include "mvlmm_float.h" +#else +#include "lmm.h" +#include "mvlmm.h" +#endif + + + +using namespace std; + + +//in this file, X, Y are already transformed (i.e. UtX and UtY) + + +void MVLMM::CopyFromParam (PARAM &cPar) +{ + a_mode=cPar.a_mode; + d_pace=cPar.d_pace; + + file_bfile=cPar.file_bfile; + file_geno=cPar.file_geno; + file_out=cPar.file_out; + + l_min=cPar.l_min; + l_max=cPar.l_max; + n_region=cPar.n_region; + p_nr=cPar.p_nr; + em_iter=cPar.em_iter; + nr_iter=cPar.nr_iter; + em_prec=cPar.em_prec; + nr_prec=cPar.nr_prec; + crt=cPar.crt; + + Vg_remle_null=cPar.Vg_remle_null; + Ve_remle_null=cPar.Ve_remle_null; + Vg_mle_null=cPar.Vg_mle_null; + Ve_mle_null=cPar.Ve_mle_null; + + time_UtX=0.0; + time_opt=0.0; + + ni_total=cPar.ni_total; + ns_total=cPar.ns_total; + ni_test=cPar.ni_test; + ns_test=cPar.ns_test; + n_cvt=cPar.n_cvt; + + n_ph=cPar.n_ph; + + indicator_idv=cPar.indicator_idv; + indicator_snp=cPar.indicator_snp; + snpInfo=cPar.snpInfo; + + return; +} + + +void MVLMM::CopyToParam (PARAM &cPar) +{ + cPar.time_UtX=time_UtX; + cPar.time_opt=time_opt; + + cPar.Vg_remle_null=Vg_remle_null; + cPar.Ve_remle_null=Ve_remle_null; + cPar.Vg_mle_null=Vg_mle_null; + cPar.Ve_mle_null=Ve_mle_null; + + cPar.VVg_remle_null=VVg_remle_null; + cPar.VVe_remle_null=VVe_remle_null; + cPar.VVg_mle_null=VVg_mle_null; + cPar.VVe_mle_null=VVe_mle_null; + + cPar.beta_remle_null=beta_remle_null; + cPar.se_beta_remle_null=se_beta_remle_null; + cPar.beta_mle_null=beta_mle_null; + cPar.se_beta_mle_null=se_beta_mle_null; + + cPar.logl_remle_H0=logl_remle_H0; + cPar.logl_mle_H0=logl_mle_H0; + return; +} + + +void MVLMM::WriteFiles () +{ + string file_str; + file_str="./output/"+file_out; + file_str+=".assoc.txt"; + + ofstream outfile (file_str.c_str(), ofstream::out); + if (!outfile) {cout<<"error writing file: "<size1; + double d, logdet_Ve=0.0; + + //eigen decomposition of V_e + gsl_matrix *Lambda=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *V_e_temp=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *V_e_h=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *V_e_hi=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *VgVehi=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *U_l=gsl_matrix_alloc (d_size, d_size); + + gsl_matrix_memcpy(V_e_temp, V_e); + EigenDecomp(V_e_temp, U_l, D_l, 0); + + //calculate V_e_h and V_e_hi + gsl_matrix_set_zero(V_e_h); + gsl_matrix_set_zero(V_e_hi); + for (size_t i=0; isize, d_size=D_l->size, dc_size=Qi->size1; + size_t c_size=dc_size/d_size; + + double delta, dl, d1, d2, d, logdet_Q; + + gsl_matrix *Q=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix_set_zero (Q); + + for (size_t i=0; isize, c_size=X->size1, d_size=D_l->size; + + gsl_vector_set_zero (xHiy); + + double x, delta, dl, y, d; + for (size_t i=0; isize, d_size=D_l->size; + double delta, dl, d_u, d_e; + + for (size_t k=0; ksize1, d_size=UltVehiY->size1; + + gsl_matrix *YUX=gsl_matrix_alloc (d_size, c_size); + + gsl_matrix_memcpy (UltVehiBX, UltVehiY); + gsl_matrix_sub (UltVehiBX, UltVehiU); + + gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, UltVehiBX, X, 0.0, YUX); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, YUX, XXti, 0.0, UltVehiB); + + gsl_matrix_free(YUX); + + return; +} + +void UpdateRL_B (const gsl_vector *xHiy, const gsl_matrix *Qi, gsl_matrix *UltVehiB) +{ + size_t d_size=UltVehiB->size1, c_size=UltVehiB->size2, dc_size=Qi->size1; + + gsl_vector *b=gsl_vector_alloc (dc_size); + + //calculate b=Qiv + gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b); + + //copy b to UltVehiB + for (size_t i=0; isize, d_size=U->size1; + + gsl_matrix_set_zero (V_g); + gsl_matrix_set_zero (V_e); + + double delta; + + //calculate the first part: UD^{-1}U^T and EE^T + for (size_t k=0; ksize, c_size=X->size1, d_size=D_l->size, dc_size=Qi->size1; + + gsl_matrix_set_zero(Sigma_uu); + gsl_matrix_set_zero(Sigma_ee); + + double delta, dl, x, d; + + //calculate the first diagonal term + gsl_vector_view Suu_diag=gsl_matrix_diagonal (Sigma_uu); + gsl_vector_view See_diag=gsl_matrix_diagonal (Sigma_ee); + + for (size_t k=0; ksize, d_size=D_l->size, dc_size=Qi->size1; + double logl=0.0, delta, dl, y, d; + + //calculate yHiy+log|H_k| + for (size_t k=0; ksize, c_size=X->size1, d_size=Y->size1; + size_t dc_size=d_size*c_size; + + gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size); + gsl_matrix *XXti=gsl_matrix_alloc (c_size, c_size); + gsl_vector *D_l=gsl_vector_alloc (d_size); + gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *UltVehiB=gsl_matrix_alloc (d_size, c_size); + gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *Sigma_uu=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *Sigma_ee=gsl_matrix_alloc (d_size, d_size); + gsl_vector *xHiy=gsl_vector_alloc (dc_size); + gsl_permutation * pmt=gsl_permutation_alloc (c_size); + + double logl_const=0.0, logl_old=0.0, logl_new=0.0, logdet_Q, logdet_Ve; + int sig; + + //calculate |XXt| and (XXt)^{-1} + gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt); + for (size_t i=0; isize, c_size=W->size1, d_size=V_g->size1; + size_t dc_size=d_size*c_size; + double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value; + + gsl_vector *D_l=gsl_vector_alloc (d_size); + gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *WHix=gsl_matrix_alloc (dc_size, d_size); + gsl_matrix *QiWHix=gsl_matrix_alloc(dc_size, d_size); + + gsl_matrix *xPx=gsl_matrix_alloc (d_size, d_size); + gsl_vector *xPy=gsl_vector_alloc (d_size); + //gsl_vector *UltVehiy=gsl_vector_alloc (d_size); + gsl_vector *WHiy=gsl_vector_alloc (dc_size); + + gsl_matrix_set_zero (xPx); + gsl_matrix_set_zero (WHix); + gsl_vector_set_zero (xPy); + gsl_vector_set_zero (WHiy); + + //eigen decomposition and calculate log|Ve| + logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi); + + //calculate Qi and log|Q| + logdet_Q=CalcQi (eval, D_l, W, Qi); + + //calculate UltVehiY + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY); + + //calculate WHix, WHiy, xHiy, xHix + for (size_t i=0; isize, c_size=W->size1, d_size=V_g->size1; + size_t dc_size=d_size*c_size; + double delta, dl, d, dy, dw, logdet_Ve, logdet_Q; + + gsl_vector *D_l=gsl_vector_alloc (d_size); + gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *Qi_temp=gsl_matrix_alloc (dc_size, dc_size); + //gsl_vector *UltVehiy=gsl_vector_alloc (d_size); + gsl_vector *WHiy=gsl_vector_alloc (dc_size); + gsl_vector *QiWHiy=gsl_vector_alloc (dc_size); + gsl_vector *beta=gsl_vector_alloc (dc_size); + gsl_matrix *Vbeta=gsl_matrix_alloc (dc_size, dc_size); + + gsl_vector_set_zero (WHiy); + + //eigen decomposition and calculate log|Ve| + logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi); + + //calculate Qi and log|Q| + logdet_Q=CalcQi (eval, D_l, W, Qi); + + //calculate UltVehiY + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY); + + //calculate WHiy + for (size_t i=0; isize2; j++) { + for (size_t i=0; isize1; i++) { + gsl_matrix_set(B, i, j, gsl_vector_get(beta, j*d_size+i)); + gsl_matrix_set(se_B, i, j, sqrt(gsl_matrix_get(Vbeta, j*d_size+i, j*d_size+i))); + } + } + + //free matrices + gsl_vector_free(D_l); + gsl_matrix_free(UltVeh); + gsl_matrix_free(UltVehi); + gsl_matrix_free(Qi); + gsl_matrix_free(Qi_temp); + gsl_vector_free(WHiy); + gsl_vector_free(QiWHiy); + gsl_vector_free(beta); + gsl_matrix_free(Vbeta); + + return; +} + + + +//below are functions for Newton-Raphson's algorithm + + + + + +//calculate all Hi and return logdet_H=\sum_{k=1}^{n}log|H_k| +//and calculate Qi and return logdet_Q +//and calculate yPy +void CalcHiQi (const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *Hi_all, gsl_matrix *Qi, double &logdet_H, double &logdet_Q) +{ + gsl_matrix_set_zero (Hi_all); + gsl_matrix_set_zero (Qi); + logdet_H=0.0; logdet_Q=0.0; + + size_t n_size=eval->size, c_size=X->size1, d_size=V_g->size1; + double logdet_Ve=0.0, delta, dl, d; + + gsl_matrix *mat_dd=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); + gsl_vector *D_l=gsl_vector_alloc (d_size); + + //calculate D_l, UltVeh and UltVehi + logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi); + + //calculate each Hi and log|H_k| + logdet_H=(double)n_size*logdet_Ve; + for (size_t k=0; ksize2, d_size=Y->size1; + + for (size_t k=0; ksize2, c_size=X->size1, d_size=Hi_all->size1; + + double d; + + for (size_t k=0; ksize2; + + for (size_t k=0; ksize2, d_size=Y->size1, dc_size=xHi->size1; + + for (size_t k=0; k=d_size || j>=d_size) {cout<<"error in GetIndex."<size; + + double delta, d1, d2; + + for (size_t k=0; ksize, d_size=Hiy->size1; + + double delta, d; + + for (size_t k=0; ksize, dc_size=xHi->size1; + size_t d_size=xHi->size2/n_size; + + double delta; + + gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *mat_dcdc_t=gsl_matrix_alloc (dc_size, dc_size); + + for (size_t k=0; ksize, d_size=Hiy->size1; + + double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; + + for (size_t k=0; ksize, d_size=Hiy->size1; + + double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; + + for (size_t k=0; ksize, d_size=Hi->size1, dc_size=xHi->size1; + + double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; + + gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size); + + for (size_t k=0; ksize, d_size=Hi->size1; + double delta, d; + + for (size_t k=0; ksize, d_size=Hi->size1; + double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; + + for (size_t k=0; ksize1, d_size=Hi->size1; + size_t v=GetIndex(i, j, d_size); + + double d; + + //calculate the first part: trace(HiD) + Calc_traceHiD (eval, Hi, i, j, tPD_g, tPD_e); + + //calculate the second part: -trace(HixQixHiD) + for (size_t k=0; ksize1, d_size=Hi->size1; + size_t v_size=d_size*(d_size+1)/2; + size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size); + + double d; + + //calculate the first part: trace(HiDHiD) + Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge); + + //calculate the second and third parts: -trace(HixQixHiDHiD)-trace(HiDHixQixHiD) + for (size_t i=0; isize1; + size_t v; + + for (size_t i=0; isize2/eval->size, dc_size=xHi->size1; + size_t v; + + for (size_t i=0; isize1; + size_t v1, v2; + + for (size_t i1=0; i1size2/eval->size, dc_size=xHi->size1; + size_t v1, v2; + + for (size_t i1=0; i1size; + double delta, d_Hi_ij; + + gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *mat_dcdc_temp=gsl_matrix_alloc (dc_size, dc_size); + + for (size_t k=0; ksize1; + size_t v_size=xHiDHix_all_g->size2/dc_size; + + for (size_t i=0; isize2; i++) { + gsl_vector_const_view vec_g=gsl_matrix_const_column (vec_all_g, i); + gsl_vector_const_view vec_e=gsl_matrix_const_column (vec_all_e, i); + + gsl_vector_view Qivec_g=gsl_matrix_column (Qivec_all_g, i); + gsl_vector_view Qivec_e=gsl_matrix_column (Qivec_all_e, i); + + gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, &vec_g.vector, 0.0, &Qivec_g.vector); + gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, &vec_e.vector, 0.0, &Qivec_e.vector); + } + + return; +} + + +//calculate Qi(xHiDHix) for each pair of i j (i<=j) +void Calc_QiMat_all (const gsl_matrix *Qi, const gsl_matrix *mat_all_g, const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g, gsl_matrix *Qimat_all_e) +{ + size_t dc_size=Qi->size1; + size_t v_size=mat_all_g->size2/mat_all_g->size1; + + for (size_t i=0; isize1; + size_t v=GetIndex(i, j, d_size); + + double d; + + //first part: ytHiDHiy + Calc_yHiDHiy (eval, Hiy, i, j, yPDPy_g, yPDPy_e); + + //second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy) + gsl_vector_const_view xHiDHiy_g=gsl_matrix_const_column (xHiDHiy_all_g, v); + gsl_vector_const_view xHiDHiy_e=gsl_matrix_const_column (xHiDHiy_all_e, v); + + gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d); + yPDPy_g-=d*2.0; + gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d); + yPDPy_e-=d*2.0; + + //fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy) + gsl_vector_const_view xHiDHixQixHiy_g=gsl_matrix_const_column (xHiDHixQixHiy_all_g, v); + gsl_vector_const_view xHiDHixQixHiy_e=gsl_matrix_const_column (xHiDHixQixHiy_all_e, v); + + gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d); + yPDPy_g+=d; + gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d); + yPDPy_e+=d; + + return; +} + +//calculate yPDPDPy=y(Hi-HixQixHi)D(Hi-HixQixHi)D(Hi-HixQixHi)y +//yPDPDPy=yHiDHiDHiy +//-(yHix)Qi(xHiDHiDHiy)-(yHiDHiDHix)Qi(xHiy) +//-(yHiDHix)Qi(xHiDHiy) +//+(yHix)Qi(xHiDHix)Qi(xHiDHiy)+(yHiDHix)Qi(xHiDHix)Qi(xHiy) +//+(yHix)Qi(xHiDHiDHix)Qi(xHiy) +//-(yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy) +void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *Hiy, const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g, const gsl_matrix *xHiDHiy_all_e, const gsl_matrix *QixHiDHiy_all_g, const gsl_matrix *QixHiDHiy_all_e, const gsl_matrix *xHiDHixQixHiy_all_g, const gsl_matrix *xHiDHixQixHiy_all_e, const gsl_matrix *QixHiDHixQixHiy_all_g, const gsl_matrix *QixHiDHixQixHiy_all_e, const gsl_matrix *xHiDHiDHiy_all_gg, const gsl_matrix *xHiDHiDHiy_all_ee, const gsl_matrix *xHiDHiDHiy_all_ge, const gsl_matrix *xHiDHiDHix_all_gg, const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1, const size_t j1, const size_t i2, const size_t j2, double &yPDPDPy_gg, double &yPDPDPy_ee, double &yPDPDPy_ge) +{ + size_t d_size=Hi->size1, dc_size=xHi->size1; + size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size); + size_t v_size=d_size*(d_size+1)/2; + + double d; + + gsl_vector *xHiDHiDHixQixHiy=gsl_vector_alloc (dc_size); + + //first part: yHiDHiDHiy + Calc_yHiDHiDHiy (eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge); + + //second and third parts: -(yHix)Qi(xHiDHiDHiy)-(yHiDHiDHix)Qi(xHiy) + gsl_vector_const_view xHiDHiDHiy_gg1=gsl_matrix_const_column (xHiDHiDHiy_all_gg, v1*v_size+v2); + gsl_vector_const_view xHiDHiDHiy_ee1=gsl_matrix_const_column (xHiDHiDHiy_all_ee, v1*v_size+v2); + gsl_vector_const_view xHiDHiDHiy_ge1=gsl_matrix_const_column (xHiDHiDHiy_all_ge, v1*v_size+v2); + + gsl_vector_const_view xHiDHiDHiy_gg2=gsl_matrix_const_column (xHiDHiDHiy_all_gg, v2*v_size+v1); + gsl_vector_const_view xHiDHiDHiy_ee2=gsl_matrix_const_column (xHiDHiDHiy_all_ee, v2*v_size+v1); + gsl_vector_const_view xHiDHiDHiy_ge2=gsl_matrix_const_column (xHiDHiDHiy_all_ge, v2*v_size+v1); + + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d); + yPDPDPy_gg-=d; + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d); + yPDPDPy_ee-=d; + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d); + yPDPDPy_ge-=d; + + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d); + yPDPDPy_gg-=d; + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d); + yPDPDPy_ee-=d; + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d); + yPDPDPy_ge-=d; + + //fourth part: -(yHiDHix)Qi(xHiDHiy) + gsl_vector_const_view xHiDHiy_g1=gsl_matrix_const_column (xHiDHiy_all_g, v1); + gsl_vector_const_view xHiDHiy_e1=gsl_matrix_const_column (xHiDHiy_all_e, v1); + gsl_vector_const_view QixHiDHiy_g2=gsl_matrix_const_column (QixHiDHiy_all_g, v2); + gsl_vector_const_view QixHiDHiy_e2=gsl_matrix_const_column (QixHiDHiy_all_e, v2); + + gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d); + yPDPDPy_gg-=d; + gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d); + yPDPDPy_ee-=d; + gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d); + yPDPDPy_ge-=d; + + //fifth and sixth parts: +(yHix)Qi(xHiDHix)Qi(xHiDHiy)+(yHiDHix)Qi(xHiDHix)Qi(xHiy) + gsl_vector_const_view QixHiDHiy_g1=gsl_matrix_const_column (QixHiDHiy_all_g, v1); + gsl_vector_const_view QixHiDHiy_e1=gsl_matrix_const_column (QixHiDHiy_all_e, v1); + + gsl_vector_const_view xHiDHixQixHiy_g1=gsl_matrix_const_column (xHiDHixQixHiy_all_g, v1); + gsl_vector_const_view xHiDHixQixHiy_e1=gsl_matrix_const_column (xHiDHixQixHiy_all_e, v1); + gsl_vector_const_view xHiDHixQixHiy_g2=gsl_matrix_const_column (xHiDHixQixHiy_all_g, v2); + gsl_vector_const_view xHiDHixQixHiy_e2=gsl_matrix_const_column (xHiDHixQixHiy_all_e, v2); + + gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d); + yPDPDPy_gg+=d; + gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d); + yPDPDPy_gg+=d; + + gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d); + yPDPDPy_ee+=d; + gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d); + yPDPDPy_ee+=d; + + gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d); + yPDPDPy_ge+=d; + gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d); + yPDPDPy_ge+=d; + + //seventh part: +(yHix)Qi(xHiDHiDHix)Qi(xHiy) + gsl_matrix_const_view xHiDHiDHix_gg=gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size); + gsl_matrix_const_view xHiDHiDHix_ee=gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size); + gsl_matrix_const_view xHiDHiDHix_ge=gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size); + + gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix, QixHiy, 0.0, xHiDHiDHixQixHiy); + gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); + yPDPDPy_gg+=d; + gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix, QixHiy, 0.0, xHiDHiDHixQixHiy); + gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); + yPDPDPy_ee+=d; + gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix, QixHiy, 0.0, xHiDHiDHixQixHiy); + gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); + yPDPDPy_ge+=d; + + //eighth part: -(yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy) + gsl_vector_const_view QixHiDHixQixHiy_g1=gsl_matrix_const_column (QixHiDHixQixHiy_all_g, v1); + gsl_vector_const_view QixHiDHixQixHiy_e1=gsl_matrix_const_column (QixHiDHixQixHiy_all_e, v1); + + gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_g2.vector, &d); + yPDPDPy_gg-=d; + gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector, &xHiDHixQixHiy_e2.vector, &d); + yPDPDPy_ee-=d; + gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_e2.vector, &d); + yPDPDPy_ge-=d; + + //free memory + gsl_vector_free(xHiDHiDHixQixHiy); + + return; +} + + +//calculate Edgeworth correctation factors for small samples +//notation and method follows Thomas J. Rothenberg, Econometirca 1984; 52 (4) +//M=xHiDHix +void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_matrix *QixHiDHix_all_g, const gsl_matrix *QixHiDHix_all_e, const gsl_matrix *xHiDHiDHix_all_gg, const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge, const size_t d_size, double &crt_a, double &crt_b, double &crt_c) +{ + crt_a=0.0; crt_b=0.0; crt_c=0.0; + + size_t dc_size=Qi->size1, v_size=Hessian_inv->size1/2; + size_t c_size=dc_size/d_size; + double h_gg, h_ge, h_ee, d, B=0.0, C=0.0, D=0.0; + double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee, trCC_gg, trCC_ge, trCC_ee, trD_gg=0.0, trD_ge=0.0, trD_ee=0.0; + + gsl_matrix *QiMQi_g1=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *QiMQi_e1=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *QiMQi_g2=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *QiMQi_e2=gsl_matrix_alloc (dc_size, dc_size); + + gsl_matrix *QiMQisQisi_g1=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *QiMQisQisi_e1=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *QiMQisQisi_g2=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *QiMQisQisi_e2=gsl_matrix_alloc (d_size, d_size); + + gsl_matrix *QiMQiMQi_gg=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *QiMQiMQi_ge=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *QiMQiMQi_ee=gsl_matrix_alloc (dc_size, dc_size); + + gsl_matrix *QiMMQi_gg=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *QiMMQi_ge=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *QiMMQi_ee=gsl_matrix_alloc (dc_size, dc_size); + + gsl_matrix *Qi_si=gsl_matrix_alloc (d_size, d_size); + + gsl_matrix *M_dd=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *M_dcdc=gsl_matrix_alloc (dc_size, dc_size); + + //invert Qi_sub to Qi_si + gsl_matrix *Qi_sub=gsl_matrix_alloc (d_size, d_size); + + gsl_matrix_const_view Qi_s=gsl_matrix_const_submatrix (Qi, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size); + + int sig; + gsl_permutation * pmt=gsl_permutation_alloc (d_size); + + gsl_matrix_memcpy (Qi_sub, &Qi_s.matrix); + LUDecomp (Qi_sub, pmt, &sig); + LUInvert (Qi_sub, pmt, Qi_si); + + gsl_permutation_free(pmt); + gsl_matrix_free(Qi_sub); + + //calculate correctation factors + for (size_t v1=0; v1size1, d_size=Hi->size1; + size_t c_size=dc_size/d_size; + size_t v_size=d_size*(d_size+1)/2; + size_t v1, v2; + double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge; + + gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); + + gsl_matrix *xHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size); + gsl_matrix *xHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size); + gsl_matrix *xHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size); + gsl_matrix *xHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size); + gsl_matrix *xHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size); + gsl_matrix *xHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size); + + gsl_matrix *QixHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size); + gsl_matrix *QixHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size); + gsl_matrix *QixHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size); + gsl_matrix *QixHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size); + gsl_matrix *QixHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size); + gsl_matrix *QixHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size); + + gsl_matrix *xHiDHiDHiy_all_gg=gsl_matrix_alloc (dc_size, v_size*v_size); + gsl_matrix *xHiDHiDHiy_all_ee=gsl_matrix_alloc (dc_size, v_size*v_size); + gsl_matrix *xHiDHiDHiy_all_ge=gsl_matrix_alloc (dc_size, v_size*v_size); + gsl_matrix *xHiDHiDHix_all_gg=gsl_matrix_alloc (dc_size, v_size*v_size*dc_size); + gsl_matrix *xHiDHiDHix_all_ee=gsl_matrix_alloc (dc_size, v_size*v_size*dc_size); + gsl_matrix *xHiDHiDHix_all_ge=gsl_matrix_alloc (dc_size, v_size*v_size*dc_size); + + //calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all + Calc_xHiDHiy_all (eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e); + Calc_xHiDHix_all (eval, xHi, xHiDHix_all_g, xHiDHix_all_e); + Calc_xHiDHixQixHiy_all (xHiDHix_all_g, xHiDHix_all_e, QixHiy, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e); + + Calc_xHiDHiDHiy_all (v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg, xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge); + Calc_xHiDHiDHix_all (v_size, eval, Hi, xHi, xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge); + + //calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all + Calc_QiVec_all (Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g, QixHiDHiy_all_e); + Calc_QiVec_all (Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e); + Calc_QiMat_all (Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g, QixHiDHix_all_e); + + double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee, tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge; + double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge; + + //calculate gradient and Hessian for Vg + for (size_t i1=0; i11) { + CalcCRT (Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e, xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, d_size, crt_a, crt_b, crt_c); + } else { + crt_a=0.0; crt_b=0.0; crt_c=0.0; + } + + gsl_matrix_free(xHiDHiy_all_g); + gsl_matrix_free(xHiDHiy_all_e); + gsl_matrix_free(xHiDHix_all_g); + gsl_matrix_free(xHiDHix_all_e); + gsl_matrix_free(xHiDHixQixHiy_all_g); + gsl_matrix_free(xHiDHixQixHiy_all_e); + + gsl_matrix_free(QixHiDHiy_all_g); + gsl_matrix_free(QixHiDHiy_all_e); + gsl_matrix_free(QixHiDHix_all_g); + gsl_matrix_free(QixHiDHix_all_e); + gsl_matrix_free(QixHiDHixQixHiy_all_g); + gsl_matrix_free(QixHiDHixQixHiy_all_e); + + gsl_matrix_free(xHiDHiDHiy_all_gg); + gsl_matrix_free(xHiDHiDHiy_all_ee); + gsl_matrix_free(xHiDHiDHiy_all_ge); + gsl_matrix_free(xHiDHiDHix_all_gg); + gsl_matrix_free(xHiDHiDHix_all_ee); + gsl_matrix_free(xHiDHiDHix_all_ge); + + return; +} + + +//update Vg, Ve +void UpdateVgVe (const gsl_matrix *Hessian_inv, const gsl_vector *gradient, const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) +{ + size_t v_size=gradient->size/2, d_size=V_g->size1; + size_t v; + + gsl_vector *vec_v=gsl_vector_alloc (v_size*2); + + double d; + + //vectorize Vg and Ve + for (size_t i=0; isize, c_size=X->size1, d_size=Y->size1; + size_t dc_size=d_size*c_size; + size_t v_size=d_size*(d_size+1)/2; + + double logdet_H, logdet_Q, yPy, logl_const, logl_old=0.0, logl_new=0.0, step_scale; + int sig; + size_t step_iter, flag_pd; + + gsl_matrix *Vg_save=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *Ve_save=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *V_temp=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *U_temp=gsl_matrix_alloc (d_size, d_size); + gsl_vector *D_temp=gsl_vector_alloc (d_size); + gsl_vector *xHiy=gsl_vector_alloc (dc_size); + gsl_vector *QixHiy=gsl_vector_alloc (dc_size); + gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size); + + gsl_vector *gradient=gsl_vector_alloc (v_size*2); + + //calculate |XXt| and (XXt)^{-1} + gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt); + for (size_t i=0; i10 ) && step_iter<10 && t!=0); + + //terminate if change is small + if (t!=0) { + if (logl_newsize1; i++) { + for (size_t j=0; jsize2; j++) { + cout<size, c_size=X->size1, d_size=Y->size1; + double a, b, c; + double lambda, logl, vg, ve; + + //Initial the diagonal elements of Vg and Ve using univariate LMM and REML estimates + gsl_matrix *Xt=gsl_matrix_alloc (n_size, c_size); + gsl_vector *beta_temp=gsl_vector_alloc(c_size); + gsl_vector *se_beta_temp=gsl_vector_alloc(c_size); + + gsl_matrix_transpose_memcpy (Xt, X); + + for (size_t i=0; i4) { + //first obtain good initial values + //large matrices for EM + gsl_matrix *U_hat=gsl_matrix_alloc (2, n_size); + gsl_matrix *E_hat=gsl_matrix_alloc (2, n_size); + gsl_matrix *OmegaU=gsl_matrix_alloc (2, n_size); + gsl_matrix *OmegaE=gsl_matrix_alloc (2, n_size); + gsl_matrix *UltVehiY=gsl_matrix_alloc (2, n_size); + gsl_matrix *UltVehiBX=gsl_matrix_alloc (2, n_size); + gsl_matrix *UltVehiU=gsl_matrix_alloc (2, n_size); + gsl_matrix *UltVehiE=gsl_matrix_alloc (2, n_size); + + //large matrices for NR + gsl_matrix *Hi_all=gsl_matrix_alloc (2, 2*n_size); //each dxd block is H_k^{-1} + gsl_matrix *Hiy_all=gsl_matrix_alloc (2, n_size); //each column is H_k^{-1}y_k + gsl_matrix *xHi_all=gsl_matrix_alloc (2*c_size, 2*n_size); //each dcxdc block is x_k\otimes H_k^{-1} + gsl_matrix *Hessian=gsl_matrix_alloc (6, 6); + + //2 by n matrix of Y + gsl_matrix *Y_sub=gsl_matrix_alloc (2, n_size); + gsl_matrix *Vg_sub=gsl_matrix_alloc (2, 2); + gsl_matrix *Ve_sub=gsl_matrix_alloc (2, 2); + gsl_matrix *B_sub=gsl_matrix_alloc (2, c_size); + + for (size_t i=0; isize1, d_size=UtY->size2, c_size=UtW->size2; + + size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2; + + //large matrices for EM + gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size); + + //large matrices for NR + gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1} + gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k + gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1} + gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); + + gsl_vector *x=gsl_vector_alloc (n_size); + gsl_vector *x_miss=gsl_vector_alloc (n_size); + + gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size); + gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1); + gsl_vector *beta=gsl_vector_alloc (d_size); + gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size); + + //null estimates for initial values + gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1); + gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size); + + gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size); + gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size); + gsl_matrix_view xHi_all_sub=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size); + + gsl_matrix_transpose_memcpy (Y, UtY); + + gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW); + + gsl_vector_view X_row=gsl_matrix_row(X, c_size); + gsl_vector_set_zero(&X_row.vector); + gsl_vector_view B_col=gsl_matrix_column(B, c_size); + gsl_vector_set_zero(&B_col.vector); + + MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix); + logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix); + logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null); + + c=0; + Vg_remle_null.clear(); + Ve_remle_null.clear(); + for (size_t i=0; isize1; i++) { + for (size_t j=0; jsize2; j++) { + beta_remle_null.push_back(gsl_matrix_get(B, i, j) ); + se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) ); + } + } + logl_remle_H0=logl_H0; + + cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + cout.precision(4); + + cout<<"REMLE estimate for Vg in the null model: "<1) { + gsl_vector_set(x, i, 2-geno); + } + } + + //calculate statistics + time_start=clock(); + gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector); + time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + + //initial values + gsl_matrix_memcpy (V_g, V_g_null); + gsl_matrix_memcpy (V_e, V_e_null); + gsl_matrix_memcpy (B, B_null); + + time_start=clock(); + + //3 is before 1 + if (a_mode==3 || a_mode==4) { + p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta); + if (p_score1) {gsl_vector_scale(beta, -1.0);} + + time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + + //store summary data + //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + for (size_t i=0; i b; + + // double lambda_mle=0, lambda_remle=0, beta=0, se=0, ; + double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0; + double crt_a, crt_b, crt_c; + int n_bit, n_miss, ci_total, ci_test; + double geno, x_mean; + size_t c=0; + // double s=0.0; + size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2; + size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2; + + //large matrices for EM + gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size); + + //large matrices for NR + gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1} + gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k + gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1} + gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); + + gsl_vector *x=gsl_vector_alloc (n_size); + + gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size); + gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size); + gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1); + gsl_vector *beta=gsl_vector_alloc (d_size); + gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size); + + //null estimates for initial values + gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size); + gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1); + gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size); + + gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size); + gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size); + gsl_matrix_view xHi_all_sub=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size); + + gsl_matrix_transpose_memcpy (Y, UtY); + gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW); + + gsl_vector_view X_row=gsl_matrix_row(X, c_size); + gsl_vector_set_zero(&X_row.vector); + gsl_vector_view B_col=gsl_matrix_column(B, c_size); + gsl_vector_set_zero(&B_col.vector); + + //time_start=clock(); + MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix); + + logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix); + logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null); + //cout<<"time for REML in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<size1; i++) { + for (size_t j=0; jsize2; j++) { + beta_remle_null.push_back(gsl_matrix_get(B, i, j) ); + se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) ); + } + } + logl_remle_H0=logl_H0; + + cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + cout.precision(4); + cout<<"REMLE estimate for Vg in the null model: "<=0) {break;} + //if (snpInfo[t].rs_number!="MAG18140902") {continue;} + //cout<1) { + gsl_vector_set(x, i, 2-geno); + } + } + + /* + if (t==0) { + ofstream outfile ("./snp1.txt", ofstream::out); + if (!outfile) {cout<<"error writing file: "<size; i++) { + outfile<1) {gsl_vector_scale(beta, -1.0);} + + time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + + //store summary data + //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + for (size_t i=0; i