From 0dd4e05fc8babc1517de1d7981a99ad0a5241a5e Mon Sep 17 00:00:00 2001
From: Peter Carbonetto
Date: Thu, 4 May 2017 14:43:12 -0500
Subject: Added new files shared by Xiang via email on May 4, 2017.
---
src/bslmmdap.cpp | 1015 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/bslmmdap.h | 101 ++++++
src/ldr.cpp | 285 +++++++++++++++
src/ldr.h | 77 +++++
src/logistic.cpp | 783 +++++++++++++++++++++++++++++++++++++++++
src/logistic.h | 70 ++++
src/varcov.cpp | 482 ++++++++++++++++++++++++++
src/varcov.h | 72 ++++
8 files changed, 2885 insertions(+)
create mode 100644 src/bslmmdap.cpp
create mode 100644 src/bslmmdap.h
create mode 100644 src/ldr.cpp
create mode 100644 src/ldr.h
create mode 100644 src/logistic.cpp
create mode 100644 src/logistic.h
create mode 100644 src/varcov.cpp
create mode 100644 src/varcov.h
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
new file mode 100644
index 0000000..0bf0e7b
--- /dev/null
+++ b/src/bslmmdap.cpp
@@ -0,0 +1,1015 @@
+/*
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011 Xiang Zhou
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "gsl/gsl_vector.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_randist.h"
+#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_roots.h"
+
+
+
+#include "logistic.h"
+#include "lapack.h"
+
+#ifdef FORCE_FLOAT
+#include "param_float.h"
+#include "bslmmdap_float.h"
+#include "lmm_float.h" //for class FUNC_PARAM and MatrixCalcLR
+#include "lm_float.h"
+#include "mathfunc_float.h" //for function CenterVector
+#else
+#include "param.h"
+#include "bslmmdap.h"
+#include "lmm.h"
+#include "lm.h"
+#include "mathfunc.h"
+#endif
+
+using namespace std;
+
+
+
+
+void BSLMMDAP::CopyFromParam (PARAM &cPar)
+{
+ file_out=cPar.file_out;
+ path_out=cPar.path_out;
+
+ time_UtZ=0.0;
+ time_Omega=0.0;
+
+ h_min=cPar.h_min;
+ h_max=cPar.h_max;
+ h_ngrid=cPar.h_ngrid;
+ rho_min=cPar.rho_min;
+ rho_max=cPar.rho_max;
+ rho_ngrid=cPar.rho_ngrid;
+
+ if (h_min<=0) {h_min=0.01;}
+ if (h_max>=1) {h_max=0.99;}
+ if (rho_min<=0) {rho_min=0.01;}
+ if (rho_max>=1) {rho_max=0.99;}
+
+ trace_G=cPar.trace_G;
+
+ ni_total=cPar.ni_total;
+ ns_total=cPar.ns_total;
+ ni_test=cPar.ni_test;
+ ns_test=cPar.ns_test;
+
+ indicator_idv=cPar.indicator_idv;
+ indicator_snp=cPar.indicator_snp;
+ snpInfo=cPar.snpInfo;
+
+ return;
+}
+
+
+void BSLMMDAP::CopyToParam (PARAM &cPar)
+{
+ cPar.time_UtZ=time_UtZ;
+ cPar.time_Omega=time_Omega;
+
+ return;
+}
+
+
+
+//read hyp file
+void ReadFile_hyb (const string &file_hyp, vector &vec_sa2, vector &vec_sb2, vector &vec_wab)
+{
+ vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear();
+
+ igzstream infile (file_hyp.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open hyp file: "< &vec_rs, vector > > &BF)
+{
+ BF.clear(); vec_rs.clear();
+
+ igzstream infile (file_bf.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open bf file: "< vec_bf;
+ vector > mat_bf;
+ char *ch_ptr;
+
+ size_t bf_size, flag_block;
+
+ getline(infile, line);
+
+ size_t t=0;
+ while (!safeGetline(infile, line).eof()) {
+ flag_block=0;
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ rs=ch_ptr;
+ vec_rs.push_back(rs);
+
+ ch_ptr=strtok (NULL, " , \t");
+ if (t==0) {
+ block=ch_ptr;
+ } else {
+ if (strcmp(ch_ptr, block.c_str() )!=0) {
+ flag_block=1;
+ block=ch_ptr;
+ }
+ }
+
+ ch_ptr=strtok (NULL, " , \t");
+ while (ch_ptr!=NULL) {
+ vec_bf.push_back(atof(ch_ptr));
+ ch_ptr=strtok (NULL, " , \t");
+ }
+
+ if (t==0) {
+ bf_size=vec_bf.size();
+ } else {
+ if (bf_size!=vec_bf.size()) {cout<<"error! unequal row size in bf file."< &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd)
+{
+ igzstream infile (file_cat.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open category file: "< > mapRS2catc;
+ map > mapRS2catd;
+ vector catc;
+ vector catd;
+
+ //read the following lines to record mapRS2cat
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+ if (header.rs_col==0) {
+ rs=chr+":"+pos;
+ }
+
+ catc.clear(); catd.clear();
+
+ for (size_t i=0; i0) {mapRS2catc[rs]=catc;}
+ if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;}
+ }
+
+ //load into Ad and Ac
+ if (kc>0) {
+ Ac=gsl_matrix_alloc(vec_rs.size(), kc);
+ for (size_t i=0; i0) {
+ Ad=gsl_matrix_int_alloc(vec_rs.size(), kd);
+
+ for (size_t i=0; i rcd;
+ int val;
+ for (size_t j=0; jsize1; i++) {
+ val = gsl_matrix_int_get(Ad, i, j);
+ rcd[val] = 1;
+ }
+ gsl_vector_int_set (dlevel, j, rcd.size());
+ }
+ }
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+
+
+
+
+
+
+void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF)
+{
+ string file_bf, file_hyp;
+ file_bf=path_out+"/"+file_out;
+ file_bf+=".bf.txt";
+ file_hyp=path_out+"/"+file_out;
+ file_hyp+=".hyp.txt";
+
+ ofstream outfile_bf, outfile_hyp;
+
+ outfile_bf.open (file_bf.c_str(), ofstream::out);
+ outfile_hyp.open (file_hyp.c_str(), ofstream::out);
+
+ if (!outfile_bf) {cout<<"error writing file: "<size1; i++) {
+ for (size_t j=0; jsize2; j++) {
+ outfile_hyp<size2; i++) {
+ outfile_bf<<"\t"<<"BF"<size2; j++) {
+ outfile_bf<<"\t"< &vec_rs, const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef)
+{
+ string file_gamma, file_hyp, file_coef;
+ file_gamma=path_out+"/"+file_out;
+ file_gamma+=".gamma.txt";
+ file_hyp=path_out+"/"+file_out;
+ file_hyp+=".hyp.txt";
+ file_coef=path_out+"/"+file_out;
+ file_coef+=".coef.txt";
+
+ ofstream outfile_gamma, outfile_hyp, outfile_coef;
+
+ outfile_gamma.open (file_gamma.c_str(), ofstream::out);
+ outfile_hyp.open (file_hyp.c_str(), ofstream::out);
+ outfile_coef.open (file_coef.c_str(), ofstream::out);
+
+ if (!outfile_gamma) {cout<<"error writing file: "<size1; i++) {
+ for (size_t j=0; jsize2; j++) {
+ outfile_hyp<size; i++) {
+ outfile_coef< &rank)
+{
+ size_t pos;
+ for (size_t i=0; isize);
+
+ double logm=0.0;
+ double d, uy, Hi_yy=0, logdet_H=0.0;
+ for (size_t i=0; isize1, UtXgamma->size2);
+ gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
+ gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
+ gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
+ gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
+
+ gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
+
+ logdet_H=0.0; P_yy=0.0;
+ for (size_t i=0; i vec_sa2, vec_sb2, logm_null;
+
+ gsl_matrix *BF=gsl_matrix_alloc(ns_test, n_grid);
+ gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1);
+ gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
+
+ //compute tau by using yty
+ gsl_blas_ddot (Uty, Uty, &tau);
+ tau=(double)ni_test/tau;
+
+ //set up grid values for sigma_a2 and sigma_b2 based on an approximately even grid for h and rho, and a fixed number of causals
+ size_t ij=0;
+ for (size_t i=0; i sum_pip;
+ map sum;
+
+ int levels = gsl_vector_int_get(dlevel,0);
+
+ for(int i=0;isize1;i++){
+ int cat = gsl_matrix_int_get(Xd,i,0);
+ sum_pip[cat] += gsl_vector_get(pip_vec,i);
+ sum[cat] += 1;
+ }
+
+ for(int i=0;isize1;i++){
+ int cat = gsl_matrix_int_get(Xd,i,0);
+ gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]);
+ }
+
+ //double baseline=0;
+ for(int i=0;i &vec_rs, const vector &vec_sa2, const vector &vec_sb2, const vector &wab, const vector > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) {
+ clock_t time_start;
+
+ //set up BF
+ double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
+ size_t t1, t2;
+ size_t n_grid=wab.size(), ns_test=vec_rs.size();
+
+ gsl_vector *prior_vec=gsl_vector_alloc(ns_test);
+ gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
+ gsl_vector *pip=gsl_vector_alloc(ns_test);
+ gsl_vector *coef=gsl_vector_alloc(kc+kd+1);
+
+ //perform the EM algorithm
+ vector vec_wab, vec_wab_new;
+
+ //initial values
+ for (size_t t=0; t1e-3) {
+ //update E_gamma
+ t1=0, t2=0;
+ for (size_t b=0; bsize; t++) {
+ s+=gsl_vector_get(pip, t);
+ }
+ s=s/(double)pip->size;
+ for (size_t t=0; tsize; t++) {
+ gsl_vector_set(prior_vec, t, s);
+ }
+
+ gsl_vector_set (coef, 0, log(s/(1-s)));
+ } else if(kc==0 && kd!=0){//only discrete annotations
+ if(kd == 1){
+ single_ct_regression(Ad, dlevel, pip, coef, prior_vec);
+ }else{
+ logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0);
+ logistic_cat_pred(coef, Ad, dlevel, prior_vec);
+ }
+ } else if (kc!=0 && kd==0) {//only continuous annotations
+ logistic_cont_fit(coef, Ac, pip, 0, 0);
+ logistic_cont_pred(coef, Ac, prior_vec);
+ } else if (kc!=0 && kd!=0) {//both continuous and categorical annotations
+ logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0);
+ logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec);
+ }
+
+ //compute marginal likelihood
+ logm=0;
+
+ t1=0;
+ for (size_t b=0; b0) {
+ dif=logm-logm_save;
+ }
+ logm_save=logm;
+ it++;
+
+ cout<<"iteration = "<10) {break;}
+
+ if (a_mode==13) {
+ SampleZ (y, z_hat, z);
+ mean_z=CenterVector (z);
+
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz);
+ time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //First proposal
+ if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
+ logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
+ beta_old.clear();
+ for (size_t i=0; isize; ++i) {
+ beta_old.push_back(gsl_vector_get(beta, i));
+ }
+ gsl_matrix_free (UtXgamma);
+ gsl_vector_free (beta);
+ }
+ }
+
+
+ delete [] p_gamma;
+ beta_g.clear();
+
+ return;
+}
+
+*/
+
+
+
+
+
+
+/*
+//below fits MCMC for rho=1
+void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty)
+{
+ time_t time_start=clock();
+ gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size);
+ gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
+ gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size);
+
+#ifdef WITH_LAPACK
+ lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix);
+#else
+ gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix);
+#endif
+ gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector);
+
+ time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ return;
+}
+
+
+
+double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp)
+{
+ double logpost=0.0;
+
+ //for quantitative traits, calculate pve and pge
+ //pve and pge for case/control data are calculted in CalcCC_PVEnZ
+ if (a_mode==11) {
+ cHyp.pve=0.0;
+ cHyp.pge=1.0;
+ }
+
+ //calculate likelihood
+ if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);}
+ else {logpost-=0.5*yty;}
+
+ logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
+
+ return logpost;
+}
+
+
+double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, const gsl_vector *Xty, const double yty, const size_t s_size, gsl_vector *Xb, gsl_vector *beta, class HYPBSLMM &cHyp)
+{
+ double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
+ double logpost=0.0;
+ double d, P_yy=yty, logdet_O=0.0;
+
+ gsl_matrix_const_view Xgamma_sub=gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size);
+ gsl_matrix_const_view XtX_sub=gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size);
+ gsl_vector_const_view Xty_sub=gsl_vector_const_subvector (Xty, 0, s_size);
+
+ gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size);
+ gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size);
+ gsl_vector *beta_hat=gsl_vector_alloc (s_size);
+ gsl_vector *Xty_temp=gsl_vector_alloc (s_size);
+
+ gsl_vector_memcpy (Xty_temp, &Xty_sub.vector);
+
+ //calculate Omega
+ gsl_matrix_memcpy (Omega, &XtX_sub.matrix);
+ gsl_matrix_scale (Omega, sigma_a2);
+ gsl_matrix_set_identity (M_temp);
+ gsl_matrix_add (Omega, M_temp);
+
+ //calculate beta_hat
+ logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
+ gsl_vector_scale (beta_hat, sigma_a2);
+
+ gsl_blas_ddot (Xty_temp, beta_hat, &d);
+ P_yy-=d;
+
+ //sample tau
+ double tau=1.0;
+ if (a_mode==11) {tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); }
+
+ //sample beta
+ for (size_t i=0; i.
+ */
+
+
+#ifndef __BSLMMDAP_H__
+#define __BSLMMDAP_H__
+
+#include
+#include