/* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_eigen.h" #include "gsl/gsl_randist.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" #include "logistic.h" #include "lapack.h" #include "io.h" #include "param.h" #include "bslmmdap.h" #include "lmm.h" #include "lm.h" #include "mathfunc.h" using namespace std; void BSLMMDAP::CopyFromParam (PARAM &cPar) { file_out=cPar.file_out; path_out=cPar.path_out; time_UtZ=0.0; time_Omega=0.0; h_min=cPar.h_min; h_max=cPar.h_max; h_ngrid=cPar.h_ngrid; rho_min=cPar.rho_min; rho_max=cPar.rho_max; rho_ngrid=cPar.rho_ngrid; if (h_min<=0) {h_min=0.01;} if (h_max>=1) {h_max=0.99;} if (rho_min<=0) {rho_min=0.01;} if (rho_max>=1) {rho_max=0.99;} trace_G=cPar.trace_G; ni_total=cPar.ni_total; ns_total=cPar.ns_total; ni_test=cPar.ni_test; ns_test=cPar.ns_test; indicator_idv=cPar.indicator_idv; indicator_snp=cPar.indicator_snp; snpInfo=cPar.snpInfo; return; } void BSLMMDAP::CopyToParam (PARAM &cPar) { cPar.time_UtZ=time_UtZ; cPar.time_Omega=time_Omega; return; } // Read hyp file. void ReadFile_hyb (const string &file_hyp, vector &vec_sa2, vector &vec_sb2, vector &vec_wab) { vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear(); igzstream infile (file_hyp.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open hyp file: "< &vec_rs, vector > > &BF) { BF.clear(); vec_rs.clear(); igzstream infile (file_bf.c_str(), igzstream::in); if (!infile) {cout<<"error! fail to open bf file: "< vec_bf; vector > mat_bf; char *ch_ptr; size_t bf_size, flag_block; getline(infile, line); size_t t=0; while (!safeGetline(infile, line).eof()) { flag_block=0; ch_ptr=strtok ((char *)line.c_str(), " , \t"); rs=ch_ptr; vec_rs.push_back(rs); ch_ptr=strtok (NULL, " , \t"); if (t==0) { block=ch_ptr; } else { if (strcmp(ch_ptr, block.c_str() )!=0) { flag_block=1; block=ch_ptr; } } ch_ptr=strtok (NULL, " , \t"); while (ch_ptr!=NULL) { vec_bf.push_back(atof(ch_ptr)); ch_ptr=strtok (NULL, " , \t"); } if (t==0) { bf_size=vec_bf.size(); } else { if (bf_size!=vec_bf.size()) { cout<<"error! unequal row size in bf file."< &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd) { igzstream infile (file_cat.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open category file: "< > mapRS2catc; map > mapRS2catd; vector catc; vector catd; // Read the following lines to record mapRS2cat. while (!safeGetline(infile, line).eof()) { ch_ptr=strtok ((char *)line.c_str(), " , \t"); if (header.rs_col==0) { rs=chr+":"+pos; } catc.clear(); catd.clear(); for (size_t i=0; i0) {mapRS2catc[rs]=catc;} if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;} } // Load into Ad and Ac. if (kc>0) { Ac=gsl_matrix_alloc(vec_rs.size(), kc); for (size_t i=0; i0) { Ad=gsl_matrix_int_alloc(vec_rs.size(), kd); for (size_t i=0; i rcd; int val; for (size_t j=0; jsize1; i++) { val = gsl_matrix_int_get(Ad, i, j); rcd[val] = 1; } gsl_vector_int_set (dlevel, j, rcd.size()); } } infile.clear(); infile.close(); return; } void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) { string file_bf, file_hyp; file_bf=path_out+"/"+file_out; file_bf+=".bf.txt"; file_hyp=path_out+"/"+file_out; file_hyp+=".hyp.txt"; ofstream outfile_bf, outfile_hyp; outfile_bf.open (file_bf.c_str(), ofstream::out); outfile_hyp.open (file_hyp.c_str(), ofstream::out); if (!outfile_bf) { cout<<"error writing file: "<size1; i++) { for (size_t j=0; jsize2; j++) { outfile_hyp<size2; i++) { outfile_bf<<"\t"<<"BF"<size2; j++) { outfile_bf<<"\t"< &vec_rs, const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef) { string file_gamma, file_hyp, file_coef; file_gamma=path_out+"/"+file_out; file_gamma+=".gamma.txt"; file_hyp=path_out+"/"+file_out; file_hyp+=".hyp.txt"; file_coef=path_out+"/"+file_out; file_coef+=".coef.txt"; ofstream outfile_gamma, outfile_hyp, outfile_coef; outfile_gamma.open (file_gamma.c_str(), ofstream::out); outfile_hyp.open (file_hyp.c_str(), ofstream::out); outfile_coef.open (file_coef.c_str(), ofstream::out); if (!outfile_gamma) { cout<<"error writing file: "<size1; i++) { for (size_t j=0; jsize2; j++) { outfile_hyp<size; i++) { outfile_coef<size); double logm=0.0; double d, uy, Hi_yy=0, logdet_H=0.0; for (size_t i=0; isize1, UtXgamma->size2); gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2); gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2); gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1); gsl_matrix_memcpy (UtXgamma_eval, UtXgamma); logdet_H=0.0; P_yy=0.0; for (size_t i=0; i vec_sa2, vec_sb2, logm_null; gsl_matrix *BF=gsl_matrix_alloc(ns_test, n_grid); gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1); gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5); // Compute tau by using yty. gsl_blas_ddot (Uty, Uty, &tau); tau=(double)ni_test/tau; // Set up grid values for sigma_a2 and sigma_b2 based on an // approximately even grid for h and rho, and a fixed number // of causals. size_t ij=0; for (size_t i=0; i sum_pip; map sum; int levels = gsl_vector_int_get(dlevel,0); for(int i=0;isize1;i++){ int cat = gsl_matrix_int_get(Xd,i,0); sum_pip[cat] += gsl_vector_get(pip_vec,i); sum[cat] += 1; } for(int i=0;isize1;i++){ int cat = gsl_matrix_int_get(Xd,i,0); gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]); } for(int i=0;i &vec_rs, const vector &vec_sa2, const vector &vec_sb2, const vector &wab, const vector > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) { clock_t time_start; // Set up BF. double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save; size_t t1, t2; size_t n_grid=wab.size(), ns_test=vec_rs.size(); gsl_vector *prior_vec=gsl_vector_alloc(ns_test); gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5); gsl_vector *pip=gsl_vector_alloc(ns_test); gsl_vector *coef=gsl_vector_alloc(kc+kd+1); // Perform the EM algorithm. vector vec_wab, vec_wab_new; // Initial values. for (size_t t=0; t1e-3) { // Update E_gamma. t1=0, t2=0; for (size_t b=0; bsize; t++) { s+=gsl_vector_get(pip, t); } s=s/(double)pip->size; for (size_t t=0; tsize; t++) { gsl_vector_set(prior_vec, t, s); } gsl_vector_set (coef, 0, log(s/(1-s))); } else if(kc==0 && kd!=0){ // Only discrete annotations. if(kd == 1){ single_ct_regression(Ad, dlevel, pip, coef, prior_vec); }else{ logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0); logistic_cat_pred(coef, Ad, dlevel, prior_vec); } } else if (kc!=0 && kd==0) { // Only continuous annotations. logistic_cont_fit(coef, Ac, pip, 0, 0); logistic_cont_pred(coef, Ac, prior_vec); } else if (kc!=0 && kd!=0) { // Both continuous and categorical annotations. logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0); logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec); } // Compute marginal likelihood. logm=0; t1=0; for (size_t b=0; b0) { dif=logm-logm_save; } logm_save=logm; it++; cout<<"iteration = "<