/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_eigen.h"
#include "gsl/gsl_randist.h"
#include "gsl/gsl_cdf.h"
#include "gsl/gsl_roots.h"
#include "logistic.h"
#include "lapack.h"
#include "io.h"
#include "param.h"
#include "bslmmdap.h"
#include "lmm.h"
#include "lm.h"
#include "mathfunc.h"
using namespace std;
void BSLMMDAP::CopyFromParam (PARAM &cPar) {
file_out=cPar.file_out;
path_out=cPar.path_out;
time_UtZ=0.0;
time_Omega=0.0;
h_min=cPar.h_min;
h_max=cPar.h_max;
h_ngrid=cPar.h_ngrid;
rho_min=cPar.rho_min;
rho_max=cPar.rho_max;
rho_ngrid=cPar.rho_ngrid;
if (h_min<=0) {h_min=0.01;}
if (h_max>=1) {h_max=0.99;}
if (rho_min<=0) {rho_min=0.01;}
if (rho_max>=1) {rho_max=0.99;}
trace_G=cPar.trace_G;
ni_total=cPar.ni_total;
ns_total=cPar.ns_total;
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
indicator_idv=cPar.indicator_idv;
indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
return;
}
void BSLMMDAP::CopyToParam (PARAM &cPar) {
cPar.time_UtZ=time_UtZ;
cPar.time_Omega=time_Omega;
return;
}
// Read hyp file.
void ReadFile_hyb (const string &file_hyp, vector &vec_sa2,
vector &vec_sb2, vector &vec_wab) {
vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear();
igzstream infile (file_hyp.c_str(), igzstream::in);
if (!infile) {
cout<<"error! fail to open hyp file: "< &vec_rs,
vector > > &BF) {
BF.clear(); vec_rs.clear();
igzstream infile (file_bf.c_str(), igzstream::in);
if (!infile) {cout<<"error! fail to open bf file: "< vec_bf;
vector > mat_bf;
char *ch_ptr;
size_t bf_size, flag_block;
getline(infile, line);
size_t t=0;
while (!safeGetline(infile, line).eof()) {
flag_block=0;
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
vec_rs.push_back(rs);
ch_ptr=strtok (NULL, " , \t");
if (t==0) {
block=ch_ptr;
} else {
if (strcmp(ch_ptr, block.c_str() )!=0) {
flag_block=1;
block=ch_ptr;
}
}
ch_ptr=strtok (NULL, " , \t");
while (ch_ptr!=NULL) {
vec_bf.push_back(atof(ch_ptr));
ch_ptr=strtok (NULL, " , \t");
}
if (t==0) {
bf_size=vec_bf.size();
} else {
if (bf_size!=vec_bf.size()) {
cout<<"error! unequal row size in bf file."< &vec_rs,
gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
size_t &kc, size_t &kd) {
igzstream infile (file_cat.c_str(), igzstream::in);
if (!infile) {
cout<<"error! fail to open category file: "< > mapRS2catc;
map > mapRS2catd;
vector catc;
vector catd;
// Read the following lines to record mapRS2cat.
while (!safeGetline(infile, line).eof()) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
if (header.rs_col==0) {
rs=chr+":"+pos;
}
catc.clear(); catd.clear();
for (size_t i=0; i0) {mapRS2catc[rs]=catc;}
if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;}
}
// Load into Ad and Ac.
if (kc>0) {
Ac=gsl_matrix_alloc(vec_rs.size(), kc);
for (size_t i=0; i0) {
Ad=gsl_matrix_int_alloc(vec_rs.size(), kd);
for (size_t i=0; i rcd;
int val;
for (size_t j=0; jsize1; i++) {
val = gsl_matrix_int_get(Ad, i, j);
rcd[val] = 1;
}
gsl_vector_int_set (dlevel, j, rcd.size());
}
}
infile.clear();
infile.close();
return;
}
void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) {
string file_bf, file_hyp;
file_bf=path_out+"/"+file_out;
file_bf+=".bf.txt";
file_hyp=path_out+"/"+file_out;
file_hyp+=".hyp.txt";
ofstream outfile_bf, outfile_hyp;
outfile_bf.open (file_bf.c_str(), ofstream::out);
outfile_hyp.open (file_hyp.c_str(), ofstream::out);
if (!outfile_bf) {
cout<<"error writing file: "<size1; i++) {
for (size_t j=0; jsize2; j++) {
outfile_hyp<size2; i++) {
outfile_bf<<"\t"<<"BF"<size2; j++) {
outfile_bf<<"\t"< &vec_rs,
const gsl_matrix *Hyper, const gsl_vector *pip,
const gsl_vector *coef) {
string file_gamma, file_hyp, file_coef;
file_gamma=path_out+"/"+file_out;
file_gamma+=".gamma.txt";
file_hyp=path_out+"/"+file_out;
file_hyp+=".hyp.txt";
file_coef=path_out+"/"+file_out;
file_coef+=".coef.txt";
ofstream outfile_gamma, outfile_hyp, outfile_coef;
outfile_gamma.open (file_gamma.c_str(), ofstream::out);
outfile_hyp.open (file_hyp.c_str(), ofstream::out);
outfile_coef.open (file_coef.c_str(), ofstream::out);
if (!outfile_gamma) {
cout<<"error writing file: "<size1; i++) {
for (size_t j=0; jsize2; j++) {
outfile_hyp<size; i++) {
outfile_coef<size);
double logm=0.0;
double d, uy, Hi_yy=0, logdet_H=0.0;
for (size_t i=0; isize1,
UtXgamma->size2);
gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
logdet_H=0.0; P_yy=0.0;
for (size_t i=0; i vec_sa2, vec_sb2, logm_null;
gsl_matrix *BF=gsl_matrix_alloc(ns_test, n_grid);
gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1);
gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
// Compute tau by using yty.
gsl_blas_ddot (Uty, Uty, &tau);
tau=(double)ni_test/tau;
// Set up grid values for sigma_a2 and sigma_b2 based on an
// approximately even grid for h and rho, and a fixed number
// of causals.
size_t ij=0;
for (size_t i=0; i sum_pip;
map sum;
int levels = gsl_vector_int_get(dlevel,0);
for(int i=0;isize1;i++){
int cat = gsl_matrix_int_get(Xd,i,0);
sum_pip[cat] += gsl_vector_get(pip_vec,i);
sum[cat] += 1;
}
for(int i=0;isize1;i++){
int cat = gsl_matrix_int_get(Xd,i,0);
gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]);
}
for(int i=0;i &vec_rs,
const vector &vec_sa2,
const vector &vec_sb2,
const vector &wab,
const vector > > &BF,
gsl_matrix *Ac, gsl_matrix_int *Ad,
gsl_vector_int *dlevel) {
clock_t time_start;
// Set up BF.
double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
size_t t1, t2;
size_t n_grid=wab.size(), ns_test=vec_rs.size();
gsl_vector *prior_vec=gsl_vector_alloc(ns_test);
gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
gsl_vector *pip=gsl_vector_alloc(ns_test);
gsl_vector *coef=gsl_vector_alloc(kc+kd+1);
// Perform the EM algorithm.
vector vec_wab, vec_wab_new;
// Initial values.
for (size_t t=0; t1e-3) {
// Update E_gamma.
t1=0, t2=0;
for (size_t b=0; bsize; t++) {
s+=gsl_vector_get(pip, t);
}
s=s/(double)pip->size;
for (size_t t=0; tsize; t++) {
gsl_vector_set(prior_vec, t, s);
}
gsl_vector_set (coef, 0, log(s/(1-s)));
} else if(kc==0 && kd!=0){
// Only discrete annotations.
if(kd == 1){
single_ct_regression(Ad, dlevel, pip, coef, prior_vec);
}else{
logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0);
logistic_cat_pred(coef, Ad, dlevel, prior_vec);
}
} else if (kc!=0 && kd==0) {
// Only continuous annotations.
logistic_cont_fit(coef, Ac, pip, 0, 0);
logistic_cont_pred(coef, Ac, prior_vec);
} else if (kc!=0 && kd!=0) {
// Both continuous and categorical annotations.
logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0);
logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec);
}
// Compute marginal likelihood.
logm=0;
t1=0;
for (size_t b=0; b0) {
dif=logm-logm_save;
}
logm_save=logm;
it++;
cout<<"iteration = "<