8 files changed, 2885 insertions, 0 deletions
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
new file mode 100644
index 0000000..0bf0e7b
--- /dev/null
+++ b/src/bslmmdap.cpp
@@ -0,0 +1,1015 @@
+/*
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011  Xiang Zhou
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+#include <iomanip>
+#include <cmath>
+#include <iostream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctime>
+#include <cstring>
+#include <algorithm>
+
+#include "gsl/gsl_vector.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_randist.h"
+#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_roots.h"
+
+
+
+#include "logistic.h"
+#include "lapack.h"
+
+#ifdef FORCE_FLOAT
+#include "param_float.h"
+#include "bslmmdap_float.h"
+#include "lmm_float.h"  //for class FUNC_PARAM and MatrixCalcLR
+#include "lm_float.h"
+#include "mathfunc_float.h"  //for function CenterVector
+#else
+#include "param.h"
+#include "bslmmdap.h"
+#include "lmm.h"
+#include "lm.h"
+#include "mathfunc.h"
+#endif
+
+using namespace std;
+
+
+
+
+void BSLMMDAP::CopyFromParam (PARAM &cPar)
+{
+	file_out=cPar.file_out;
+	path_out=cPar.path_out;
+
+	time_UtZ=0.0;
+	time_Omega=0.0;
+
+	h_min=cPar.h_min;
+	h_max=cPar.h_max;
+	h_ngrid=cPar.h_ngrid;
+	rho_min=cPar.rho_min;
+	rho_max=cPar.rho_max;
+	rho_ngrid=cPar.rho_ngrid;
+
+	if (h_min<=0) {h_min=0.01;}
+	if (h_max>=1) {h_max=0.99;}
+	if (rho_min<=0) {rho_min=0.01;}
+	if (rho_max>=1) {rho_max=0.99;}
+
+	trace_G=cPar.trace_G;
+
+	ni_total=cPar.ni_total;
+	ns_total=cPar.ns_total;
+	ni_test=cPar.ni_test;
+	ns_test=cPar.ns_test;
+
+	indicator_idv=cPar.indicator_idv;
+	indicator_snp=cPar.indicator_snp;
+	snpInfo=cPar.snpInfo;
+
+	return;
+}
+
+
+void BSLMMDAP::CopyToParam (PARAM &cPar)
+{
+	cPar.time_UtZ=time_UtZ;
+	cPar.time_Omega=time_Omega;
+
+	return;
+}
+
+
+
+//read hyp file
+void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, vector<double> &vec_sb2, vector<double> &vec_wab)
+{
+  vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear();
+
+  igzstream infile (file_hyp.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open hyp file: "<<file_hyp<<endl; return;}
+
+  string line;
+  char *ch_ptr;
+
+  getline(infile, line);
+
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    ch_ptr=strtok (NULL, " , \t");
+
+    ch_ptr=strtok (NULL, " , \t");
+    vec_sa2.push_back(atof(ch_ptr));
+
+    ch_ptr=strtok (NULL, " , \t");
+    vec_sb2.push_back(atof(ch_ptr));
+
+    ch_ptr=strtok (NULL, " , \t");
+    vec_wab.push_back(atof(ch_ptr));
+  }
+
+  infile.close();
+  infile.clear();
+
+  return;
+}
+
+
+//read bf file
+void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<vector<double> > > &BF)
+{
+  BF.clear(); vec_rs.clear();
+
+  igzstream infile (file_bf.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open bf file: "<<file_bf<<endl; return;}
+
+  string line, rs, block;
+  vector<double> vec_bf;
+  vector<vector<double> > mat_bf;
+  char *ch_ptr;
+
+  size_t bf_size, flag_block;
+
+  getline(infile, line);
+
+  size_t t=0;
+  while (!safeGetline(infile, line).eof()) {
+    flag_block=0;
+
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    rs=ch_ptr;
+    vec_rs.push_back(rs);
+
+    ch_ptr=strtok (NULL, " , \t");
+    if (t==0) {
+      block=ch_ptr;
+    } else {
+      if (strcmp(ch_ptr, block.c_str() )!=0) {
+	flag_block=1;
+	block=ch_ptr;
+      }
+    }
+
+    ch_ptr=strtok (NULL, " , \t");
+    while (ch_ptr!=NULL) {
+      vec_bf.push_back(atof(ch_ptr));
+      ch_ptr=strtok (NULL, " , \t");
+    }
+
+    if (t==0) {
+      bf_size=vec_bf.size();
+    } else {
+      if (bf_size!=vec_bf.size()) {cout<<"error! unequal row size in bf file."<<endl;}
+    }
+
+    if (flag_block==0) {
+      mat_bf.push_back(vec_bf);
+    } else {
+      BF.push_back(mat_bf);
+      mat_bf.clear();
+    }
+    vec_bf.clear();
+
+    t++;
+  }
+
+  infile.close();
+  infile.clear();
+
+  return;
+}
+
+
+//read category files
+//read both continuous and discrete category file, record mapRS2catc
+void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd)
+{
+  igzstream infile (file_cat.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return;}
+
+  string line;
+  char *ch_ptr;
+
+  string rs, chr, a1, a0, pos, cm;
+
+  //read header
+  HEADER header;
+  !safeGetline(infile, line).eof();
+  ReadHeader (line, header);
+
+  //use the header to determine the number of categories
+  kc=header.catc_col.size(); kd=header.catd_col.size();
+
+  //set up storage and mapper
+  map<string, vector<double> > mapRS2catc;
+  map<string, vector<int> > mapRS2catd;
+  vector<double> catc;
+  vector<int> catd;
+
+  //read the following lines to record mapRS2cat
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+    if (header.rs_col==0) {
+      rs=chr+":"+pos;
+    }
+
+    catc.clear(); catd.clear();
+
+    for (size_t i=0; i<header.coln; i++) {
+      if (header.rs_col!=0 && header.rs_col==i+1) {
+	rs=ch_ptr;
+      } else if (header.chr_col!=0 && header.chr_col==i+1) {
+	chr=ch_ptr;
+      } else if (header.pos_col!=0 && header.pos_col==i+1) {
+	pos=ch_ptr;
+      } else if (header.cm_col!=0 && header.cm_col==i+1) {
+	cm=ch_ptr;
+      } else if (header.a1_col!=0 && header.a1_col==i+1) {
+	a1=ch_ptr;
+      } else if (header.a0_col!=0 && header.a0_col==i+1) {
+	a0=ch_ptr;
+      } else if (header.catc_col.size()!=0 && header.catc_col.count(i+1)!=0 ) {
+	catc.push_back(atof(ch_ptr));
+      } else if (header.catd_col.size()!=0 && header.catd_col.count(i+1)!=0 ) {
+	catd.push_back(atoi(ch_ptr));
+      } else {}
+
+      ch_ptr=strtok (NULL, " , \t");
+    }
+
+    if (mapRS2catc.count(rs)==0 && kc>0) {mapRS2catc[rs]=catc;}
+    if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;}
+  }
+
+  //load into Ad and Ac
+  if (kc>0) {
+    Ac=gsl_matrix_alloc(vec_rs.size(), kc);
+    for (size_t i=0; i<vec_rs.size(); i++) {
+      if (mapRS2catc.count(vec_rs[i])!=0) {
+	for (size_t j=0; j<kc; j++) {
+	  gsl_matrix_set(Ac, i, j, mapRS2catc[vec_rs[i]][j]);
+	}
+      } else {
+	for (size_t j=0; j<kc; j++) {
+	  gsl_matrix_set(Ac, i, j, 0);
+	}
+      }
+    }
+  }
+
+  if (kd>0) {
+    Ad=gsl_matrix_int_alloc(vec_rs.size(), kd);
+
+    for (size_t i=0; i<vec_rs.size(); i++) {
+      if (mapRS2catd.count(vec_rs[i])!=0) {
+	for (size_t j=0; j<kd; j++) {
+	  gsl_matrix_int_set(Ad, i, j, mapRS2catd[vec_rs[i]][j]);
+	}
+      } else {
+	for (size_t j=0; j<kd; j++) {
+	  gsl_matrix_int_set(Ad, i, j, 0);
+	}
+      }
+    }
+
+    dlevel=gsl_vector_int_alloc(kd);
+    map<int, int> rcd;
+    int val;
+    for (size_t j=0; j<kd; j++) {
+      rcd.clear();
+      for (size_t i=0; i<Ad->size1; i++) {
+	val = gsl_matrix_int_get(Ad, i, j);
+	rcd[val] = 1;
+      }
+      gsl_vector_int_set (dlevel, j, rcd.size());
+    }
+  }
+
+  infile.clear();
+  infile.close();
+
+  return;
+}
+
+
+
+
+
+
+
+
+void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF)
+{
+  string file_bf, file_hyp;
+	file_bf=path_out+"/"+file_out;
+	file_bf+=".bf.txt";
+	file_hyp=path_out+"/"+file_out;
+	file_hyp+=".hyp.txt";
+
+	ofstream outfile_bf, outfile_hyp;
+
+	outfile_bf.open (file_bf.c_str(), ofstream::out);
+	outfile_hyp.open (file_hyp.c_str(), ofstream::out);
+
+	if (!outfile_bf) {cout<<"error writing file: "<<file_bf<<endl; return;}
+	if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;}
+
+	outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<"weight"<<endl;
+	outfile_hyp<<scientific;
+	for (size_t i=0; i<Hyper->size1; i++) {
+	  for (size_t j=0; j<Hyper->size2; j++) {
+	    outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t";
+	  }
+	  outfile_hyp<<endl;
+	}
+
+	outfile_bf<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss";
+	for (size_t i=0; i<BF->size2; i++) {
+	  outfile_bf<<"\t"<<"BF"<<i+1;
+	}
+	outfile_bf<<endl;
+
+	size_t t=0;
+	for (size_t i=0; i<ns_total; ++i) {
+	  if (indicator_snp[i]==0) {continue;}
+
+	  outfile_bf<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
+		    <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss;
+
+	  outfile_bf<<scientific;
+	  for (size_t j=0; j<BF->size2; j++) {
+	    outfile_bf<<"\t"<<setprecision(6)<<gsl_matrix_get (BF, t, j);
+	  }
+	  outfile_bf<<endl;
+
+	  t++;
+	}
+
+	outfile_hyp.close();
+	outfile_hyp.clear();
+	outfile_bf.close();
+	outfile_bf.clear();
+	return;
+}
+
+
+
+void BSLMMDAP::WriteResult (const vector<string> &vec_rs, const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef)
+{
+  string file_gamma, file_hyp, file_coef;
+	file_gamma=path_out+"/"+file_out;
+	file_gamma+=".gamma.txt";
+	file_hyp=path_out+"/"+file_out;
+	file_hyp+=".hyp.txt";
+	file_coef=path_out+"/"+file_out;
+	file_coef+=".coef.txt";
+
+	ofstream outfile_gamma, outfile_hyp, outfile_coef;
+
+	outfile_gamma.open (file_gamma.c_str(), ofstream::out);
+	outfile_hyp.open (file_hyp.c_str(), ofstream::out);
+	outfile_coef.open (file_coef.c_str(), ofstream::out);
+
+	if (!outfile_gamma) {cout<<"error writing file: "<<file_gamma<<endl; return;}
+	if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;}
+	if (!outfile_coef) {cout<<"error writing file: "<<file_coef<<endl; return;}
+
+	outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<"weight"<<endl;
+	outfile_hyp<<scientific;
+	for (size_t i=0; i<Hyper->size1; i++) {
+	  for (size_t j=0; j<Hyper->size2; j++) {
+	    outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t";
+	  }
+	  outfile_hyp<<endl;
+	}
+
+
+	outfile_gamma<<"rs"<<"\t"<<"gamma"<<endl;
+	for (size_t i=0; i<vec_rs.size(); ++i) {
+	  outfile_gamma<<vec_rs[i]<<"\t"<<scientific<<setprecision(6)<<gsl_vector_get(pip, i)<<endl;
+	}
+
+	outfile_coef<<"coef"<<endl;
+	outfile_coef<<scientific;
+	for (size_t i=0; i<coef->size; i++) {
+	  outfile_coef<<setprecision(6)<<gsl_vector_get (coef, i)<<endl;
+	}
+
+	outfile_coef.close();
+	outfile_coef.clear();
+	outfile_hyp.close();
+	outfile_hyp.clear();
+	outfile_gamma.close();
+	outfile_gamma.clear();
+	return;
+}
+
+
+
+
+/*
+void BSLMMDAP::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank)
+{
+	size_t pos;
+	for (size_t i=0; i<rank.size(); ++i) {
+		pos=mapRank2pos[rank[i]];
+		gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, i);
+		gsl_vector_const_view X_col=gsl_matrix_const_column (X, pos);
+		gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);
+	}
+
+	return;
+}
+*/
+
+double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_b2, const double tau)
+{
+	gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
+
+	double logm=0.0;
+	double d, uy, Hi_yy=0, logdet_H=0.0;
+	for (size_t i=0; i<ni_test; ++i) {
+		d=gsl_vector_get (K_eval, i)*sigma_b2;
+		d=1.0/(d+1.0);
+		gsl_vector_set (weight_Hi, i, d);
+
+		logdet_H-=log(d);
+		uy=gsl_vector_get (Uty, i);
+		Hi_yy+=d*uy*uy;
+	}
+
+	//calculate likelihood
+	logm=-0.5*logdet_H-0.5*tau*Hi_yy+0.5*log(tau)*(double)ni_test;
+
+	gsl_vector_free (weight_Hi);
+
+	return logm;
+}
+
+
+double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_a2, const double sigma_b2, const double tau)
+{
+  clock_t  time_start;
+	double logm=0.0;
+	double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;
+
+	gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, UtXgamma->size2);
+	gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
+	gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
+	gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
+	gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
+
+	gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
+
+	logdet_H=0.0; P_yy=0.0;
+	for (size_t i=0; i<ni_test; ++i) {
+		gsl_vector_view UtXgamma_row=gsl_matrix_row (UtXgamma_eval, i);
+		d=gsl_vector_get (K_eval, i)*sigma_b2;
+		d=1.0/(d+1.0);
+		gsl_vector_set (weight_Hi, i, d);
+
+		logdet_H-=log(d);
+		uy=gsl_vector_get (Uty, i);
+		P_yy+=d*uy*uy;
+		gsl_vector_scale (&UtXgamma_row.vector, d);
+	}
+
+	//calculate Omega
+	gsl_matrix_set_identity (Omega);
+
+	time_start=clock();
+#ifdef WITH_LAPACK
+	lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega);
+#else
+	gsl_blas_dgemm (CblasTrans, CblasNoTrans, sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega);
+#endif
+	time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+	//calculate beta_hat
+	gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
+
+	logdet_O=CholeskySolve(Omega, XtHiy, beta_hat);
+
+	gsl_vector_scale (beta_hat, sigma_a2);
+
+	gsl_blas_ddot (XtHiy, beta_hat, &d);
+	P_yy-=d;
+
+	gsl_matrix_free (UtXgamma_eval);
+	gsl_matrix_free (Omega);
+	gsl_vector_free (XtHiy);
+	gsl_vector_free (beta_hat);
+	gsl_vector_free (weight_Hi);
+
+	logm=-0.5*logdet_H-0.5*logdet_O-0.5*tau*P_yy+0.5*log(tau)*(double)ni_test;
+
+	return logm;
+}
+
+
+double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) {
+  double logprior=0;
+  logprior=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
+  return logprior;
+}
+
+
+//where A is the ni_test by n_cat matrix of annotations
+void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y) {
+	clock_t time_start;
+
+	//set up BF
+	double tau, h, rho, sigma_a2, sigma_b2, d;
+	size_t ns_causal=10;
+	size_t n_grid=h_ngrid*rho_ngrid;
+	vector<double> vec_sa2, vec_sb2, logm_null;
+
+	gsl_matrix *BF=gsl_matrix_alloc(ns_test, n_grid);
+	gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1);
+	gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
+
+	//compute tau by using yty
+	gsl_blas_ddot (Uty, Uty, &tau);
+	tau=(double)ni_test/tau;
+
+	//set up grid values for sigma_a2 and sigma_b2 based on an approximately even grid for h and rho, and a fixed number of causals
+	size_t ij=0;
+	for (size_t i=0; i<h_ngrid; i++) {
+	  h=h_min+(h_max-h_min)*(double)i/((double)h_ngrid-1);
+	  for (size_t j=0; j<rho_ngrid; j++) {
+	    rho=rho_min+(rho_max-rho_min)*(double)j/((double)rho_ngrid-1);
+
+	    sigma_a2=h*rho/((1-h)*(double)ns_causal);
+	    sigma_b2=h*(1.0-rho)/(trace_G*(1-h));
+
+	    vec_sa2.push_back(sigma_a2);
+	    vec_sb2.push_back(sigma_b2);
+	    logm_null.push_back(CalcMarginal (Uty, K_eval, 0.0, tau));
+
+	    gsl_matrix_set (Hyper, ij, 0, h);
+	    gsl_matrix_set (Hyper, ij, 1, rho);
+	    gsl_matrix_set (Hyper, ij, 2, sigma_a2);
+	    gsl_matrix_set (Hyper, ij, 3, sigma_b2);
+	    gsl_matrix_set (Hyper, ij, 4, 1/(double)n_grid);
+	    ij++;
+	  }
+	}
+
+	//compute BF factors
+	time_start=clock();
+	cout<<"Calculating BF..."<<endl;
+	for (size_t t=0; t<ns_test; t++) {
+	  gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, 0);
+	  gsl_vector_const_view X_col=gsl_matrix_const_column (UtX, t);
+	  gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);
+
+	  for (size_t ij=0; ij<n_grid; ij++) {
+	    sigma_a2=vec_sa2[ij];
+	    sigma_b2=vec_sb2[ij];
+
+	    d=CalcMarginal (Xgamma, Uty, K_eval, sigma_a2, sigma_b2, tau);
+	    d-=logm_null[ij];
+	    d=exp(d);
+
+	    gsl_matrix_set(BF, t, ij, d);
+	  }
+	}
+	time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+	//save results
+	WriteResult (Hyper, BF);
+
+	//free matrices and vectors
+	gsl_matrix_free(BF);
+	gsl_matrix_free(Xgamma);
+	gsl_matrix_free(Hyper);
+	return;
+}
+
+
+
+
+
+void single_ct_regression(const gsl_matrix_int *Xd, const gsl_vector_int *dlevel, const gsl_vector *pip_vec, gsl_vector *coef, gsl_vector *prior_vec) {
+
+  map<int,double> sum_pip;
+  map<int,double> sum;
+
+  int levels = gsl_vector_int_get(dlevel,0);
+
+  for(int i=0;i<levels;i++){
+    sum_pip[i] = sum[i] = 0;
+  }
+
+  for(int i=0;i<Xd->size1;i++){
+    int cat = gsl_matrix_int_get(Xd,i,0);
+    sum_pip[cat] += gsl_vector_get(pip_vec,i);
+    sum[cat] += 1;
+  }
+
+  for(int i=0;i<Xd->size1;i++){
+    int cat = gsl_matrix_int_get(Xd,i,0);
+    gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]);
+  }
+
+  //double baseline=0;
+  for(int i=0;i<levels;i++){
+    double new_prior = sum_pip[i]/sum[i];
+    //gsl_vector_set(coef, i, log(new_prior/(1-new_prior))-baseline);
+    //if(i==0){
+    //baseline = log(new_prior/(1-new_prior));
+    //}
+    gsl_vector_set(coef, i, log(new_prior/(1-new_prior)) );
+  }
+
+  return;
+}
+
+
+
+
+//where A is the ni_test by n_cat matrix of annotations
+void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) {
+	clock_t time_start;
+
+	//set up BF
+	double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
+	size_t t1, t2;
+	size_t n_grid=wab.size(), ns_test=vec_rs.size();
+
+	gsl_vector *prior_vec=gsl_vector_alloc(ns_test);
+	gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
+	gsl_vector *pip=gsl_vector_alloc(ns_test);
+	gsl_vector *coef=gsl_vector_alloc(kc+kd+1);
+
+	//perform the EM algorithm
+	vector<double> vec_wab, vec_wab_new;
+
+	//initial values
+	for (size_t t=0; t<ns_test; t++) {
+	  gsl_vector_set (prior_vec, t, (double)BF.size()/(double)ns_test);
+	}
+	for (size_t ij=0; ij<n_grid; ij++) {
+	  vec_wab.push_back(wab[ij]);
+	  vec_wab_new.push_back(wab[ij]);
+	}
+
+	//EM iteration
+	size_t it=0;
+	double dif=1;
+	while (it<100 && dif>1e-3) {
+	  //update E_gamma
+	  t1=0, t2=0;
+	  for (size_t b=0; b<BF.size(); b++) {
+	    s=1;
+	    for (size_t m=0; m<BF[b].size(); m++) {
+	      d=0;
+	      for (size_t ij=0; ij<n_grid; ij++) {
+		d+=vec_wab_new[ij]*BF[b][m][ij];
+	      }
+	      d*=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1));
+
+	      gsl_vector_set(pip, t1, d);
+	      s+=d;
+	      t1++;
+	    }
+
+	    for (size_t m=0; m<BF[b].size(); m++) {
+	      d=gsl_vector_get(pip, t2)/s;
+	      gsl_vector_set(pip, t2, d);
+	      t2++;
+	    }
+	  }
+
+	  //update E_wab
+	  s=0;
+	  for (size_t ij=0; ij<n_grid; ij++) {
+	    vec_wab_new[ij]=0;
+
+	    t1=0;
+	    for (size_t b=0; b<BF.size(); b++) {
+	      d=1;
+	      for (size_t m=0; m<BF[b].size(); m++) {
+		d+=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij];
+		t1++;
+	      }
+	      vec_wab_new[ij]+=log(d);
+	    }
+
+	    s=max(s, vec_wab_new[ij]);
+	  }
+
+	  d=0;
+	  for (size_t ij=0; ij<n_grid; ij++) {
+	    vec_wab_new[ij]=exp(vec_wab_new[ij]-s);
+	    d+=vec_wab_new[ij];
+	  }
+
+	  for (size_t ij=0; ij<n_grid; ij++) {
+	    vec_wab_new[ij]/=d;
+	    //	    vec_wab[ij]=vec_wab_new[ij];
+	  }
+
+	  //update coef, and pi
+	  if(kc==0 && kd==0){//no annotation
+	    s=0;
+	    for (size_t t=0; t<pip->size; t++) {
+	      s+=gsl_vector_get(pip, t);
+	    }
+	    s=s/(double)pip->size;
+	    for (size_t t=0; t<pip->size; t++) {
+	      gsl_vector_set(prior_vec, t, s);
+	    }
+
+	    gsl_vector_set (coef, 0, log(s/(1-s)));
+	  } else if(kc==0 && kd!=0){//only discrete annotations
+	    if(kd == 1){
+	      single_ct_regression(Ad, dlevel, pip, coef, prior_vec);
+	    }else{
+	      logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0);
+	      logistic_cat_pred(coef, Ad, dlevel, prior_vec);
+	    }
+	  } else if (kc!=0 && kd==0) {//only continuous annotations
+	    logistic_cont_fit(coef, Ac, pip, 0, 0);
+	    logistic_cont_pred(coef, Ac, prior_vec);
+	  } else if (kc!=0 && kd!=0) {//both continuous and categorical annotations
+	    logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0);
+	    logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec);
+	  }
+
+	  //compute marginal likelihood
+	  logm=0;
+
+	  t1=0;
+	  for (size_t b=0; b<BF.size(); b++) {
+	    d=1; s=0;
+	    for (size_t m=0; m<BF[b].size(); m++) {
+	      s+=log(1-gsl_vector_get(prior_vec, t1));
+	      for (size_t ij=0; ij<n_grid; ij++) {
+		d+=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij];
+	      }
+	    }
+	    logm+=log(d)+s;
+	    t1++;
+	  }
+
+	  if (it>0) {
+	    dif=logm-logm_save;
+	  }
+	  logm_save=logm;
+	  it++;
+
+	  cout<<"iteration = "<<it<<"; marginal likelihood = "<<logm<<endl;
+	}
+
+	//update h and rho that correspond to w_ab
+	for (size_t ij=0; ij<n_grid; ij++) {
+	  sigma_a2=vec_sa2[ij];
+	  sigma_b2=vec_sb2[ij];
+
+	  d=exp(gsl_vector_get(coef, coef->size-1))/(1+exp(gsl_vector_get(coef, coef->size-1)));
+	  h=(d*(double)ns_test*sigma_a2+1*sigma_b2)/(1+d*(double)ns_test*sigma_a2+1*sigma_b2);
+	  rho=d*(double)ns_test*sigma_a2/(d*(double)ns_test*sigma_a2+1*sigma_b2);
+
+	  gsl_matrix_set (Hyper, ij, 0, h);
+	  gsl_matrix_set (Hyper, ij, 1, rho);
+	  gsl_matrix_set (Hyper, ij, 2, sigma_a2);
+	  gsl_matrix_set (Hyper, ij, 3, sigma_b2);
+	  gsl_matrix_set (Hyper, ij, 4, vec_wab_new[ij]);
+	}
+
+	//obtain beta and alpha parameters
+
+
+	//save results
+	WriteResult (vec_rs, Hyper, pip, coef);
+
+	//free matrices and vectors
+	gsl_vector_free(prior_vec);
+	gsl_matrix_free(Hyper);
+	gsl_vector_free(pip);
+	gsl_vector_free(coef);
+	return;
+}
+
+/*
+//readin the estimated hyper-parameters and perform fine mapping for each region
+void BSLMM::DAP_FineMapping (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_matrix *A, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y, gsl_matrix *Hyper, gsl_vector *alpha, gsl_vector *pip) {
+	clock_t time_start;
+
+	//two priority sets: S_1 contains all candidate causal SNPs; S_2 contains the prioritized combintion of them
+	//two marginal probability sets: P_1 contains marginals for S_1; P_2 contains marginals for S_2;
+	set<size_t> S1set, S2set;
+	vector<size_t> S1vec;
+	vector<set<size_t> > S2vec;
+	vector<double> P1, P2;
+
+	//calculate P0 (null) and P1 (for every SNP)
+
+
+
+	//loop through the number of combinations
+	for (size_t s=0; s<p; s++) {
+	  //if (s==0), set up S_1: compute marginal of the null model, then compute P_1, then compute BF_1 and use them to select S_1; compute C_1
+
+
+
+	  //if (s==1), set up S_2: compute pair-wise P_2 and use them to select S_2; compute C_2
+
+	  //otherwise, match each combination of S_2 with each SNP from S_1, select into S_3; and replace S_2 with S_3; compute C_s
+
+
+	  //stop when the stopping critieria are reached (if S_2 is empty; if t; if kappa); add the residual component R
+
+	for (size_t t=0; t<total_step; ++t) {
+		if (t%d_pace==0 || t==total_step-1) {ProgressBar ("Running MCMC ", t, total_step-1, (double)n_accept/(double)(t*n_mh+1));}
+//		if (t>10) {break;}
+
+		if (a_mode==13) {
+			SampleZ (y, z_hat, z);
+			mean_z=CenterVector (z);
+
+			time_start=clock();
+			gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz);
+			time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+			//First proposal
+			if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
+				logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
+				beta_old.clear();
+				for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
+				  beta_old.push_back(0);
+				}
+			}
+			else {
+				gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp_old.n_gamma);
+				gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma);
+				SetXgamma (UtXgamma, UtX, rank_old);
+				logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old);
+
+				beta_old.clear();
+				for (size_t i=0; i<beta->size; ++i) {
+					beta_old.push_back(gsl_vector_get(beta, i));
+				}
+				gsl_matrix_free (UtXgamma);
+				gsl_vector_free (beta);
+			}
+		}
+
+
+	delete [] p_gamma;
+	beta_g.clear();
+
+	return;
+}
+
+*/
+
+
+
+
+
+
+/*
+//below fits MCMC for rho=1
+void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty)
+{
+  time_t time_start=clock();
+  gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size);
+  gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
+  gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size);
+
+#ifdef WITH_LAPACK
+  lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix);
+#else
+  gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix);
+#endif
+  gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector);
+
+  time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+  return;
+}
+
+
+
+double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp)
+{
+	double logpost=0.0;
+
+	//for quantitative traits, calculate pve and pge
+	//pve and pge for case/control data are calculted in CalcCC_PVEnZ
+	if (a_mode==11) {
+		cHyp.pve=0.0;
+		cHyp.pge=1.0;
+	}
+
+	//calculate likelihood
+	if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);}
+	else {logpost-=0.5*yty;}
+
+	logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
+
+	return logpost;
+}
+
+
+double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, const gsl_vector *Xty, const double yty, const size_t s_size, gsl_vector *Xb, gsl_vector *beta, class HYPBSLMM &cHyp)
+{
+	double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
+	double logpost=0.0;
+	double d, P_yy=yty, logdet_O=0.0;
+
+	gsl_matrix_const_view Xgamma_sub=gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size);
+	gsl_matrix_const_view XtX_sub=gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size);
+	gsl_vector_const_view Xty_sub=gsl_vector_const_subvector (Xty, 0, s_size);
+
+	gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size);
+	gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size);
+	gsl_vector *beta_hat=gsl_vector_alloc (s_size);
+	gsl_vector *Xty_temp=gsl_vector_alloc (s_size);
+
+	gsl_vector_memcpy (Xty_temp, &Xty_sub.vector);
+
+	//calculate Omega
+	gsl_matrix_memcpy (Omega, &XtX_sub.matrix);
+	gsl_matrix_scale (Omega, sigma_a2);
+	gsl_matrix_set_identity (M_temp);
+	gsl_matrix_add (Omega, M_temp);
+
+	//calculate beta_hat
+	logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
+	gsl_vector_scale (beta_hat, sigma_a2);
+
+	gsl_blas_ddot (Xty_temp, beta_hat, &d);
+	P_yy-=d;
+
+	//sample tau
+	double tau=1.0;
+	if (a_mode==11) {tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0,  2.0/P_yy); }
+
+	//sample beta
+	for (size_t i=0; i<s_size; i++)
+	{
+		d=gsl_ran_gaussian(gsl_r, 1);
+		gsl_vector_set(beta, i, d);
+	}
+	gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size);
+	gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, &beta_sub.vector);
+
+	//it compuates inv(L^T(Omega)) %*% beta;
+	gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau));
+	gsl_vector_add(&beta_sub.vector, beta_hat);
+	gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0, Xb);
+
+	//for quantitative traits, calculate pve and pge
+	if (a_mode==11) {
+		gsl_blas_ddot (Xb, Xb, &d);
+		cHyp.pve=d/(double)ni_test;
+		cHyp.pve/=cHyp.pve+1.0/tau;
+		cHyp.pge=1.0;
+	}
+
+	logpost=-0.5*logdet_O;
+	if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
+	else {logpost-=0.5*P_yy;}
+
+	logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
+
+	gsl_matrix_free (Omega);
+	gsl_matrix_free (M_temp);
+	gsl_vector_free (beta_hat);
+	gsl_vector_free (Xty_temp);
+
+	return logpost;
+}
+*/
+
+
diff --git a/src/bslmmdap.h b/src/bslmmdap.h
new file mode 100644
index 0000000..ac78f97
--- /dev/null
+++ b/src/bslmmdap.h
@@ -0,0 +1,101 @@
+/*
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011  Xiang Zhou
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef __BSLMMDAP_H__
+#define __BSLMMDAP_H__
+
+#include <vector>
+#include <map>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+
+#ifdef FORCE_FLOAT
+#include "param_float.h"
+#else
+#include "param.h"
+#endif
+
+
+using namespace std;
+
+
+
+
+
+
+class BSLMMDAP {
+
+public:
+	// IO related parameters
+	int a_mode;
+	size_t d_pace;
+
+	string file_bfile;
+	string file_geno;
+	string file_out;
+	string path_out;
+
+	// LMM related parameters
+	double pve_null;
+	double pheno_mean;
+
+	// BSLMM MCMC related parameters
+	long int randseed;
+	double trace_G;
+
+	HYPBSLMM cHyp_initial;
+
+	// Summary statistics
+	size_t ni_total, ns_total;	//number of total individuals and snps
+	size_t ni_test, ns_test;	//number of individuals and snps used for analysis
+
+	double h_min, h_max, rho_min, rho_max;
+	size_t h_ngrid, rho_ngrid;
+
+	double time_UtZ;
+	double time_Omega;		//time spent on optimization iterations
+	double time_Proposal;        //time spent on constructing the proposal distribution for gamma (i.e. lmm or lm analysis)
+	vector<int> indicator_idv;				//indicator for individuals (phenotypes), 0 missing, 1 available for analysis
+	vector<int> indicator_snp;				//sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
+
+	vector<SNPINFO> snpInfo;		//record SNP information
+
+	// Main Functions
+	void CopyFromParam (PARAM &cPar);
+	void CopyToParam (PARAM &cPar);
+
+	void WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF);
+	void WriteResult (const vector<string> &vec_rs, const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef);
+	double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_b2, const double tau);
+	double CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_a2, const double sigma_b2, const double tau);
+	double CalcPrior (class HYPBSLMM &cHyp);
+
+	void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y);
+	void DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel);
+
+};
+
+void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, vector<double> &vec_sb2, vector<double> &vec_wab);
+void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<vector<double> > > &BF);
+void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd);
+
+
+#endif
+
+
diff --git a/src/ldr.cpp b/src/ldr.cpp
new file mode 100644
index 0000000..e28490a
--- /dev/null
+++ b/src/ldr.cpp
@@ -0,0 +1,285 @@
+/*
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011  Xiang Zhou
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+#include <iomanip>
+#include <cmath>
+#include <iostream>
+#include <stdio.h>
+#include <stdlib.h> 
+#include <ctime>
+#include <cstring>
+#include <algorithm>
+
+#include "gsl/gsl_vector.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_randist.h"
+#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_roots.h"
+#include "Eigen/Dense"
+
+
+
+#include "lapack.h"
+
+#ifdef FORCE_FLOAT
+#include "param_float.h"
+#include "ldr_float.h"
+#include "lm_float.h"
+#include "mathfunc_float.h"  //for function CenterVector
+#else
+#include "param.h"
+#include "ldr.h"
+#include "lm.h"
+#include "mathfunc.h"
+#endif
+
+using namespace std;
+using namespace Eigen;
+
+
+
+void LDR::CopyFromParam (PARAM &cPar) 
+{
+	a_mode=cPar.a_mode;
+	d_pace=cPar.d_pace;
+	
+	file_bfile=cPar.file_bfile;
+	file_geno=cPar.file_geno;
+	file_out=cPar.file_out;
+	path_out=cPar.path_out;
+
+	ni_total=cPar.ni_total;
+	ns_total=cPar.ns_total;
+	ni_test=cPar.ni_test;
+	ns_test=cPar.ns_test;
+	n_cvt=cPar.n_cvt;
+	
+	indicator_idv=cPar.indicator_idv;
+	indicator_snp=cPar.indicator_snp;
+	snpInfo=cPar.snpInfo;
+	
+	return;
+}
+
+
+void LDR::CopyToParam (PARAM &cPar) 
+{
+	//cPar.pheno_mean=pheno_mean;
+	//cPar.randseed=randseed;
+	
+	return;
+}
+
+
+/*
+void BSLMM::WriteBV (const gsl_vector *bv) 
+{
+	string file_str;
+	file_str=path_out+"/"+file_out;
+	file_str+=".bv.txt";
+
+	ofstream outfile (file_str.c_str(), ofstream::out);
+	if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+	
+	size_t t=0;
+	for (size_t i=0; i<ni_total; ++i) {
+		if (indicator_idv[i]==0) {
+			outfile<<"NA"<<endl;
+		}		
+		else {
+			outfile<<scientific<<setprecision(6)<<gsl_vector_get(bv, t)<<endl;
+			t++;
+		}
+	}		
+	
+	outfile.clear();	
+	outfile.close();	
+	return;
+}
+
+
+
+
+void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, const gsl_vector *alpha, const size_t w) 
+{
+	string file_str;
+	file_str=path_out+"/"+file_out;
+	file_str+=".param.txt";
+
+	ofstream outfile (file_str.c_str(), ofstream::out);
+	if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+	
+	outfile<<"chr"<<"\t"<<"rs"<<"\t"
+			<<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t"
+			<<"beta"<<"\t"<<"gamma"<<endl;
+	
+	size_t t=0;
+	for (size_t i=0; i<ns_total; ++i) {
+		if (indicator_snp[i]==0) {continue;}		
+		
+		outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
+		<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t";	
+				
+		outfile<<scientific<<setprecision(6)<<gsl_vector_get(alpha, t)<<"\t";
+		if (beta_g[t].second!=0) {
+			outfile<<beta_g[t].first/beta_g[t].second<<"\t"<<beta_g[t].second/(double)w<<endl;
+		}
+		else {
+			outfile<<0.0<<"\t"<<0.0<<endl;
+		}
+		t++;
+	}		
+	
+	outfile.clear();	
+	outfile.close();	
+	return;
+}
+
+
+void BSLMM::WriteParam (const gsl_vector *alpha) 
+{
+	string file_str;
+	file_str=path_out+"/"+file_out;
+	file_str+=".param.txt";
+
+	ofstream outfile (file_str.c_str(), ofstream::out);
+	if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+	
+	outfile<<"chr"<<"\t"<<"rs"<<"\t"
+			<<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t"
+			<<"beta"<<"\t"<<"gamma"<<endl;
+	
+	size_t t=0;
+	for (size_t i=0; i<ns_total; ++i) {
+		if (indicator_snp[i]==0) {continue;}		
+
+		outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
+				<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t";				
+		outfile<<scientific<<setprecision(6)<<gsl_vector_get(alpha, t)<<"\t";
+		outfile<<0.0<<"\t"<<0.0<<endl;
+		t++;
+	}		
+	
+	outfile.clear();	
+	outfile.close();	
+	return;
+}
+
+
+void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl_matrix *Result_gamma, const size_t w_col) 
+{
+	string file_gamma, file_hyp;
+	file_gamma=path_out+"/"+file_out;
+	file_gamma+=".gamma.txt";
+	file_hyp=path_out+"/"+file_out;
+	file_hyp+=".hyp.txt";
+
+	ofstream outfile_gamma, outfile_hyp;
+		
+	if (flag==0) {
+		outfile_gamma.open (file_gamma.c_str(), ofstream::out);
+		outfile_hyp.open (file_hyp.c_str(), ofstream::out);
+		if (!outfile_gamma) {cout<<"error writing file: "<<file_gamma<<endl; return;}
+		if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;}
+		
+		outfile_hyp<<"h \t pve \t rho \t pge \t pi \t n_gamma"<<endl;
+		
+		for (size_t i=0; i<s_max; ++i) {
+			outfile_gamma<<"s"<<i<<"\t";
+		}
+		outfile_gamma<<endl;
+	}
+	else {
+		outfile_gamma.open (file_gamma.c_str(), ofstream::app);
+		outfile_hyp.open (file_hyp.c_str(), ofstream::app);
+		if (!outfile_gamma) {cout<<"error writing file: "<<file_gamma<<endl; return;}
+		if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;}
+		
+		size_t w;
+		if (w_col==0) {w=w_pace;}
+		else {w=w_col;}
+		
+		for (size_t i=0; i<w; ++i) {
+			outfile_hyp<<scientific;
+			for (size_t j=0; j<4; ++j) {
+				outfile_hyp<<setprecision(6)<<gsl_matrix_get (Result_hyp, i, j)<<"\t";
+			}
+			outfile_hyp<<setprecision(6)<<exp(gsl_matrix_get (Result_hyp, i, 4))<<"\t";
+			outfile_hyp<<(int)gsl_matrix_get (Result_hyp, i, 5)<<"\t";
+			outfile_hyp<<endl;
+		}
+		
+		for (size_t i=0; i<w; ++i) {
+			for (size_t j=0; j<s_max; ++j) {
+				outfile_gamma<<(int)gsl_matrix_get (Result_gamma, i, j)<<"\t";
+			}
+			outfile_gamma<<endl;
+		}
+		
+	}
+	
+	outfile_hyp.close();
+	outfile_hyp.clear();
+	outfile_gamma.close();
+	outfile_gamma.clear();	
+	return;
+}
+
+*/
+
+//X is a p by n matrix
+void LDR::VB (const vector<vector<unsigned char> > &Xt, const gsl_matrix *W_gsl, const gsl_vector *y_gsl)
+{ 
+  //save gsl_vector and gsl_matrix into eigen library formats
+  MatrixXd W(W_gsl->size1, W_gsl->size2);
+  VectorXd y(y_gsl->size);
+  VectorXd x_col(y_gsl->size);
+
+  double d;
+  for (size_t i=0; i<W_gsl->size1; i++) {
+    d=gsl_vector_get(y_gsl, i);
+    y(i)=d;
+    for (size_t j=0; j<W_gsl->size2; j++) {
+      W(i,j)=gsl_matrix_get(W_gsl, i, j);
+    }
+  }
+
+  //initial VB values by lm
+  cout<<indicator_snp[0]<<" "<<indicator_snp[1]<<" "<<indicator_snp[2]<<endl;
+  uchar_matrix_get_row (Xt, 0, x_col);
+
+  for (size_t j=0; j<10; j++) {
+    cout<<x_col(j)<<endl;
+  }
+
+
+  //run VB iterations
+
+
+
+  //save results
+
+  return;
+}
diff --git a/src/ldr.h b/src/ldr.h
new file mode 100644
index 0000000..d81048c
--- /dev/null
+++ b/src/ldr.h
@@ -0,0 +1,77 @@
+/*
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011  Xiang Zhou
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef __LDR_H__                
+#define __LDR_H__
+
+#include <vector>
+#include <map>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+
+#ifdef FORCE_FLOAT
+#include "param_float.h"
+#else
+#include "param.h"
+#endif
+
+
+using namespace std;
+
+
+
+
+
+
+class LDR {
+
+public:	
+	// IO related parameters
+	int a_mode;	
+	size_t d_pace;
+	
+	string file_bfile;
+	string file_geno;
+	string file_out;
+	string path_out;
+		
+	// Summary statistics
+	size_t ni_total, ns_total;	//number of total individuals and snps
+	size_t ni_test, ns_test;	//number of individuals and snps used for analysis
+	size_t n_cvt;				//number of covariates
+	vector<int> indicator_idv;				//indicator for individuals (phenotypes), 0 missing, 1 available for analysis
+	vector<int> indicator_snp;				//sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
+	
+	vector<SNPINFO> snpInfo;		//record SNP information
+	
+	// Not included in PARAM
+	gsl_rng *gsl_r; 	
+	
+	// Main Functions
+	void CopyFromParam (PARAM &cPar);
+	void CopyToParam (PARAM &cPar);
+	
+	void VB(const vector<vector<unsigned char> > &Xt, const gsl_matrix *W_gsl, const gsl_vector *y_gsl);
+};
+
+
+
+#endif
+
+
diff --git a/src/logistic.cpp b/src/logistic.cpp
new file mode 100644
index 0000000..1b47946
--- /dev/null
+++ b/src/logistic.cpp
@@ -0,0 +1,783 @@
+#include <stdio.h>
+#include <math.h>
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_multimin.h>
+#include <gsl/gsl_sf.h>
+#include <gsl/gsl_linalg.h>
+#include "logistic.h"
+
+// I need to bundle all the data that goes to the function to optimze together. 
+typedef struct{
+  gsl_matrix_int *X;
+  gsl_vector_int *nlev;
+  gsl_vector *y;
+  gsl_matrix *Xc;   // continuous covariates  Matrix Nobs x Kc (NULL if not used)
+  double lambdaL1;
+  double lambdaL2;
+}fix_parm_mixed_T;
+
+
+
+
+
+
+double fLogit_mixed(gsl_vector *beta
+		    ,gsl_matrix_int *X
+		    ,gsl_vector_int *nlev
+		    ,gsl_matrix *Xc
+		    ,gsl_vector *y
+		    ,double lambdaL1
+		    ,double lambdaL2)
+{
+  int n = y->size; 
+  //  int k = X->size2; 
+  int npar = beta->size; 
+  double total = 0;
+  double aux = 0;
+  /*   omp_set_num_threads(ompthr); */
+  /*   /\* Changed loop start at 1 instead of 0 to avoid regularization of beta_0*\/ */
+  /*   /\*#pragma omp parallel for reduction (+:total)*\/ */
+  for(int i = 1; i < npar; ++i)
+    total += beta->data[i]*beta->data[i];
+  total = (-total*lambdaL2/2);
+  /*   /\*#pragma omp parallel for reduction (+:aux)*\/ */
+  for(int i = 1; i < npar; ++i)
+    aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
+  total = total-aux*lambdaL1;
+  /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+  for(int i = 0; i < n; ++i) {
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < X->size2; ++k) {
+      if(gsl_matrix_int_get(X,i,k)>0)
+	Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
+      iParm+=nlev->data[k]-1;
+    }
+    for(int k = 0; k < (Xc->size2); ++k) 
+      Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
+    total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+  }
+  return -total;
+} 
+
+
+void logistic_mixed_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1)
+			 ,gsl_matrix_int *X  //Matrix Nobs x K 
+			 ,gsl_vector_int *nlev // Vector with number categories
+			 ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc (NULL if not used)
+			 ,gsl_vector *yhat //Vector of prob. predicted by the logistic
+			 )
+{
+  for(int i = 0; i < X->size1; ++i) {
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < X->size2; ++k) {
+      if(gsl_matrix_int_get(X,i,k)>0)
+	Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
+      iParm+=nlev->data[k]-1;
+    }
+    // Adding the continuous
+    for(int k = 0; k < (Xc->size2); ++k) 
+      Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
+    yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+  }
+}
+
+
+/* The gradient of f, df = (df/dx, df/dy). */
+void 
+wgsl_mixed_optim_df (const gsl_vector *beta, void *params, 
+		     gsl_vector *out)
+{
+  fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
+  int n = p->y->size; 
+  int K = p->X->size2; 
+  int Kc = p->Xc->size2; 
+  int npar = beta->size; 
+  // Intitialize gradient out necessary?
+  for(int i = 0; i < npar; ++i) 
+    out->data[i]= 0; 
+  /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0 */
+  for(int i = 1; i < npar; ++i)
+    out->data[i]= p->lambdaL2*beta->data[i]; 
+  for(int i = 1; i < npar; ++i)
+    out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
+  
+  for(int i = 0; i < n; ++i) {
+    double pn=0;
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < K; ++k) {
+      if(gsl_matrix_int_get(p->X,i,k)>0)
+	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
+      iParm+=p->nlev->data[k]-1;
+    }
+    // Adding the continuous
+    for(int k = 0; k < Kc; ++k) 
+      Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
+
+    pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+
+    out->data[0]+= pn;
+    iParm=1;
+    for(int k = 0; k < K; ++k) {
+      if(gsl_matrix_int_get(p->X,i,k)>0)
+	out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
+      iParm+=p->nlev->data[k]-1;
+    }
+    // Adding the continuous
+    for(int k = 0; k < Kc; ++k) {
+      out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
+    }
+  }
+
+}
+
+
+/* The Hessian of f */
+void 
+wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params, 
+			  gsl_matrix *out)
+{
+  fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
+  int n = p->y->size; 
+  int K = p->X->size2; 
+  int Kc = p->Xc->size2; 
+  int npar = beta->size; 
+  gsl_vector *gn = gsl_vector_alloc(npar); /* gn */
+  // Intitialize Hessian out necessary ???
+  gsl_matrix_set_zero(out);
+  /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/
+  for(int i = 1; i < npar; ++i)
+    gsl_matrix_set(out,i,i,(p->lambdaL2));  // Double check this
+  // L1 penalty not working yet, as not differentiable, I may need to do coordinate descent (as in glm_net)
+  
+  for(int i = 0; i < n; ++i) {
+    double pn=0;
+    double aux=0;
+    double Xbetai=beta->data[0];
+    int iParm1=1;
+    for(int k = 0; k < K; ++k) {
+      if(gsl_matrix_int_get(p->X,i,k)>0)
+	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1];
+      iParm1+=p->nlev->data[k]-1;  //-1?
+    }
+    // Adding the continuous
+    for(int k = 0; k < Kc; ++k) 
+      Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++];
+
+    pn= 1/(1 + gsl_sf_exp(-Xbetai));
+    // Add a protection for pn very close to 0 or 1?
+    aux=pn*(1-pn);
+
+    // Calculate sub-gradient vector gn 
+    gsl_vector_set_zero(gn);
+    gn->data[0]= 1;
+    iParm1=1;
+    for(int k = 0; k < K; ++k) {
+      if(gsl_matrix_int_get(p->X,i,k)>0)
+	gn->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1]=1;
+      iParm1+=p->nlev->data[k]-1;
+    }
+    // Adding the continuous
+    for(int k = 0; k < Kc; ++k) {
+      gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k);
+    }
+
+    for(int k1=0;k1<npar; ++k1)
+      if(gn->data[k1]!=0)
+	for(int k2=0;k2<npar; ++k2)
+	  if(gn->data[k2]!=0)
+	    *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]);
+  }
+  gsl_vector_free(gn);
+}
+
+
+double wgsl_mixed_optim_f(gsl_vector *v, void *params)
+{
+  double mLogLik=0;
+  fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
+  mLogLik = fLogit_mixed(v,p->X,p->nlev,p->Xc,p->y,p->lambdaL1,p->lambdaL2);
+  return mLogLik; 
+}
+
+
+/* Compute both f and df together. */
+void 
+wgsl_mixed_optim_fdf (gsl_vector *x, void *params, 
+		      double *f, gsl_vector *df) 
+{
+  *f = wgsl_mixed_optim_f(x, params); 
+  wgsl_mixed_optim_df(x, params, df);
+}
+
+
+int logistic_mixed_fit(gsl_vector *beta
+		       ,gsl_matrix_int *X
+		       ,gsl_vector_int *nlev
+		       ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc (NULL if not used)
+		       ,gsl_vector *y
+		       ,double lambdaL1
+		       ,double lambdaL2)
+{
+
+  double mLogLik=0;
+  fix_parm_mixed_T p;
+  int npar = beta->size; 
+  int iter=0;
+  double maxchange=0;
+
+  //Intializing fix parameters
+  p.X=X;
+  p.Xc=Xc;
+  p.nlev=nlev;
+  p.y=y;
+  p.lambdaL1=lambdaL1;
+  p.lambdaL2=lambdaL2;
+  
+  //Initial fit
+  //#ifdef _RPR_DEBUG_
+  mLogLik = wgsl_mixed_optim_f(beta,&p);
+  //fprintf(stderr,"#Initial -log(Lik(0))=%lf\n",mLogLik);
+  //#endif //_RPR_DEBUG
+
+  gsl_matrix *myH = gsl_matrix_alloc(npar,npar); /* Hessian matrix*/
+  gsl_vector *stBeta = gsl_vector_alloc(npar); /* Direction to move */
+
+  gsl_vector *myG = gsl_vector_alloc(npar); /* Gradient*/
+  gsl_vector *tau = gsl_vector_alloc(npar); /* tau for QR*/
+
+  for(iter=0;iter<100;iter++){ 
+    wgsl_mixed_optim_hessian(beta,&p,myH); //Calculate Hessian
+    wgsl_mixed_optim_df(beta,&p,myG);      //Calculate Gradient
+    gsl_linalg_QR_decomp(myH,tau);   //Calculate next beta
+    gsl_linalg_QR_solve(myH,tau,myG,stBeta);
+    gsl_vector_sub(beta,stBeta);
+    
+    //Monitor convergence
+    maxchange=0;
+    for(int i=0;i<npar; i++)
+      if(maxchange<fabs(stBeta->data[i]))
+	maxchange=fabs(stBeta->data[i]);
+
+#ifdef _RPR_DEBUG_
+    //mLogLik = wgsl_mixed_optim_f(beta,&p);
+    //fprintf(stderr,"#iter %d, -log(Lik(0))=%lf,%lf\n",(int)iter,mLogLik,maxchange);
+#endif //_RPR_DEBUG
+
+    if(maxchange<1E-4)
+      break;
+  }
+
+#ifdef _RPR_DEBUG_
+  //for (int i = 0; i < npar; i++)
+  //  fprintf(stderr,"#par_%d= %lf\n",i,beta->data[i]);
+#endif //_RPR_DEBUG
+
+  //Final fit
+  mLogLik = wgsl_mixed_optim_f(beta,&p);
+  //fprintf(stderr,"#Final %d) -log(Lik(0))=%lf, maxchange %lf\n",iter,mLogLik,maxchange);
+
+  gsl_vector_free (tau);
+  gsl_vector_free (stBeta);
+  gsl_vector_free (myG);
+  gsl_matrix_free (myH);
+
+  return 0;
+}
+
+/***************/
+/* Categorical */
+/***************/
+
+// I need to bundle all the data that goes to the function to optimze together. 
+typedef struct{
+  gsl_matrix_int *X;
+  gsl_vector_int *nlev;
+  gsl_vector *y;
+  double lambdaL1;
+  double lambdaL2;
+}fix_parm_cat_T;
+
+
+double fLogit_cat(gsl_vector *beta
+		  ,gsl_matrix_int *X
+		  ,gsl_vector_int *nlev
+		  ,gsl_vector *y
+		  ,double lambdaL1
+		  ,double lambdaL2)
+{
+  int n = y->size; 
+  //  int k = X->size2; 
+  int npar = beta->size; 
+  double total = 0;
+  double aux = 0;
+  /*   omp_set_num_threads(ompthr); */
+  /*   /\* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*\/ */
+  /*   /\*#pragma omp parallel for reduction (+:total)*\/ */
+  for(int i = 1; i < npar; ++i)
+    total += beta->data[i]*beta->data[i];
+  total = (-total*lambdaL2/2);
+  /*   /\*#pragma omp parallel for reduction (+:aux)*\/ */
+  for(int i = 1; i < npar; ++i)
+    aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
+  total = total-aux*lambdaL1;
+  /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+  for(int i = 0; i < n; ++i) {
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < X->size2; ++k) {
+      if(gsl_matrix_int_get(X,i,k)>0)
+	Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
+      iParm+=nlev->data[k]-1;
+    }
+    total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+  }
+  return -total;
+} 
+
+
+void logistic_cat_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1)
+		       ,gsl_matrix_int *X  //Matrix Nobs x K 
+		       ,gsl_vector_int *nlev // Vector with number categories
+		       ,gsl_vector *yhat //Vector of prob. predicted by the logistic
+		       )
+{
+  for(int i = 0; i < X->size1; ++i) {
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < X->size2; ++k) {
+      if(gsl_matrix_int_get(X,i,k)>0)
+	Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
+      iParm+=nlev->data[k]-1;
+    }
+    yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+  }
+}
+
+
+/* The gradient of f, df = (df/dx, df/dy). */
+void 
+wgsl_cat_optim_df (const gsl_vector *beta, void *params, 
+		   gsl_vector *out)
+{
+  fix_parm_cat_T *p = (fix_parm_cat_T *)params;
+  int n = p->y->size; 
+  int K = p->X->size2; 
+  int npar = beta->size; 
+  // Intitialize gradient out necessary?
+  for(int i = 0; i < npar; ++i) 
+    out->data[i]= 0; 
+  /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0 */
+  for(int i = 1; i < npar; ++i)
+    out->data[i]= p->lambdaL2*beta->data[i]; 
+  for(int i = 1; i < npar; ++i)
+    out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
+  
+  for(int i = 0; i < n; ++i) {
+    double pn=0;
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < K; ++k) {
+      if(gsl_matrix_int_get(p->X,i,k)>0)
+	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
+      iParm+=p->nlev->data[k]-1;
+    }
+    //    total += y->data[i]*Xbetai-log(1+gsl_sf_exp(Xbetai));
+    pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+
+    out->data[0]+= pn;
+    iParm=1;
+    for(int k = 0; k < K; ++k) {
+      if(gsl_matrix_int_get(p->X,i,k)>0)
+	out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
+      iParm+=p->nlev->data[k]-1;
+    }
+  }
+}
+
+
+/* The Hessian of f */
+void 
+wgsl_cat_optim_hessian (const gsl_vector *beta, void *params, 
+			gsl_matrix *out)
+{
+  fix_parm_cat_T *p = (fix_parm_cat_T *)params;
+  int n = p->y->size; 
+  int K = p->X->size2; 
+  int npar = beta->size; 
+  // Intitialize Hessian out necessary ???
+  gsl_matrix_set_zero(out);
+  /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/
+  for(int i = 1; i < npar; ++i)
+    gsl_matrix_set(out,i,i,(p->lambdaL2));  // Double check this
+  // L1 penalty not working yet, as not differentiable, I may need to do coordinate descent (as in glm_net)
+
+  
+  for(int i = 0; i < n; ++i) {
+    double pn=0;
+    double aux=0;
+    double Xbetai=beta->data[0];
+    int iParm2=1;
+    int iParm1=1;
+    for(int k = 0; k < K; ++k) {
+      if(gsl_matrix_int_get(p->X,i,k)>0)
+	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1];
+      iParm1+=p->nlev->data[k]-1;  //-1?
+    }
+    //    total += y->data[i]*Xbetai-log(1+gsl_sf_exp(Xbetai));
+    pn= 1/(1 + gsl_sf_exp(-Xbetai));
+    // Add a protection for pn very close to 0 or 1?
+    aux=pn*(1-pn);
+    *gsl_matrix_ptr(out,0,0)+=aux;
+    iParm2=1;
+    for(int k2 = 0; k2 < K; ++k2) {
+      if(gsl_matrix_int_get(p->X,i,k2)>0)
+	*gsl_matrix_ptr(out,0,gsl_matrix_int_get(p->X,i,k2)-1+iParm2)+=aux;
+      iParm2+=p->nlev->data[k2]-1;   //-1?
+    }
+    iParm1=1;
+    for(int k1 = 0; k1 < K; ++k1) {
+      if(gsl_matrix_int_get(p->X,i,k1)>0)
+	*gsl_matrix_ptr(out,gsl_matrix_int_get(p->X,i,k1)-1+iParm1,0)+=aux;
+      iParm2=1;
+      for(int k2 = 0; k2 < K; ++k2) {
+	if((gsl_matrix_int_get(p->X,i,k1)>0) && (gsl_matrix_int_get(p->X,i,k2)>0))
+	  *gsl_matrix_ptr(out
+			  ,gsl_matrix_int_get(p->X,i,k1)-1+iParm1
+			  ,gsl_matrix_int_get(p->X,i,k2)-1+iParm2
+			  )+=aux;
+	iParm2+=p->nlev->data[k2]-1;  //-1?
+      }
+      iParm1+=p->nlev->data[k1]-1; //-1?
+    }
+  }
+}
+
+
+double wgsl_cat_optim_f(gsl_vector *v, void *params)
+{
+  double mLogLik=0;
+  fix_parm_cat_T *p = (fix_parm_cat_T *)params;
+  mLogLik = fLogit_cat(v,p->X,p->nlev,p->y,p->lambdaL1,p->lambdaL2);
+  return mLogLik; 
+}
+
+
+/* Compute both f and df together. */
+void 
+wgsl_cat_optim_fdf (gsl_vector *x, void *params, 
+		    double *f, gsl_vector *df) 
+{
+  *f = wgsl_cat_optim_f(x, params); 
+  wgsl_cat_optim_df(x, params, df);
+}
+
+
+int logistic_cat_fit(gsl_vector *beta
+		     ,gsl_matrix_int *X
+		     ,gsl_vector_int *nlev
+		     ,gsl_vector *y
+		     ,double lambdaL1
+		     ,double lambdaL2)
+{
+  double mLogLik=0;
+  fix_parm_cat_T p;
+  int npar = beta->size; 
+  int iter=0;
+  double maxchange=0;
+
+  //Intializing fix parameters
+  p.X=X;
+  p.nlev=nlev;
+  p.y=y;
+  p.lambdaL1=lambdaL1;
+  p.lambdaL2=lambdaL2;
+  
+  //Initial fit
+  //#ifdef _RPR_DEBUG_
+  mLogLik = wgsl_cat_optim_f(beta,&p);
+  //fprintf(stderr,"#Initial -log(Lik(0))=%lf\n",mLogLik);
+  //#endif //_RPR_DEBUG
+
+  gsl_matrix *myH = gsl_matrix_alloc(npar,npar); /* Hessian matrix*/
+  gsl_vector *stBeta = gsl_vector_alloc(npar); /* Direction to move */
+
+  gsl_vector *myG = gsl_vector_alloc(npar); /* Gradient*/
+  gsl_vector *tau = gsl_vector_alloc(npar); /* tau for QR*/
+
+  for(iter=0;iter<100;iter++){ 
+    wgsl_cat_optim_hessian(beta,&p,myH); //Calculate Hessian
+    wgsl_cat_optim_df(beta,&p,myG);      //Calculate Gradient
+    gsl_linalg_QR_decomp(myH,tau);   //Calculate next beta
+    gsl_linalg_QR_solve(myH,tau,myG,stBeta);
+    gsl_vector_sub(beta,stBeta);
+    
+    //Monitor convergence
+    maxchange=0;
+    for(int i=0;i<npar; i++)
+      if(maxchange<fabs(stBeta->data[i]))
+	maxchange=fabs(stBeta->data[i]);
+
+#ifdef _RPR_DEBUG_
+    mLogLik = wgsl_cat_optim_f(beta,&p);
+    //fprintf(stderr,"#iter %d, -log(Lik(0))=%lf,%lf\n",(int)iter,mLogLik,maxchange);
+#endif //_RPR_DEBUG
+
+    if(maxchange<1E-4)
+      break;
+  }
+
+#ifdef _RPR_DEBUG_
+  //for (int i = 0; i < npar; i++)
+  //  fprintf(stderr,"#par_%d= %lf\n",i,beta->data[i]);
+#endif //_RPR_DEBUG
+
+  //Final fit
+  mLogLik = wgsl_cat_optim_f(beta,&p);
+  //fprintf(stderr,"#Final %d) -log(Lik(0))=%lf, maxchange %lf\n",iter,mLogLik,maxchange);
+
+  gsl_vector_free (tau);
+  gsl_vector_free (stBeta);
+  gsl_vector_free (myG);
+  gsl_matrix_free (myH);
+
+  return 0;
+}
+
+
+/***************/
+/* Continuous  */
+/***************/
+
+// I need to bundle all the data that goes to the function to optimze together. 
+typedef struct{
+  gsl_matrix *Xc;   // continuous covariates  Matrix Nobs x Kc 
+  gsl_vector *y;
+  double lambdaL1;
+  double lambdaL2;
+}fix_parm_cont_T;
+
+
+double fLogit_cont(gsl_vector *beta
+		   ,gsl_matrix *Xc
+		   ,gsl_vector *y
+		   ,double lambdaL1
+		   ,double lambdaL2)
+{
+  int n = y->size; 
+  int npar = beta->size; 
+  double total = 0;
+  double aux = 0;
+  /*   omp_set_num_threads(ompthr); */
+  /*   /\* Changed loop start at 1 instead of 0 to avoid regularization of beta_0*\/ */
+  /*   /\*#pragma omp parallel for reduction (+:total)*\/ */
+  for(int i = 1; i < npar; ++i)
+    total += beta->data[i]*beta->data[i];
+  total = (-total*lambdaL2/2);
+  /*   /\*#pragma omp parallel for reduction (+:aux)*\/ */
+  for(int i = 1; i < npar; ++i)
+    aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
+  total = total-aux*lambdaL1;
+  /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+  for(int i = 0; i < n; ++i) {
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < (Xc->size2); ++k) 
+      Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
+    total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+  }
+  return -total;
+} 
+
+
+void logistic_cont_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1)
+			,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc (NULL if not used)
+			,gsl_vector *yhat //Vector of prob. predicted by the logistic
+			)
+{
+  for(int i = 0; i < Xc->size1; ++i) {
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < (Xc->size2); ++k) 
+      Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
+    yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+  }
+}
+
+
+/* The gradient of f, df = (df/dx, df/dy). */
+void 
+wgsl_cont_optim_df (const gsl_vector *beta, void *params, 
+		    gsl_vector *out)
+{
+  fix_parm_cont_T *p = (fix_parm_cont_T *)params;
+  int n = p->y->size; 
+  int Kc = p->Xc->size2; 
+  int npar = beta->size; 
+  // Intitialize gradient out necessary?
+  for(int i = 0; i < npar; ++i) 
+    out->data[i]= 0; 
+  /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0 */
+  for(int i = 1; i < npar; ++i)
+    out->data[i]= p->lambdaL2*beta->data[i]; 
+  for(int i = 1; i < npar; ++i)
+    out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
+  
+  for(int i = 0; i < n; ++i) {
+    double pn=0;
+    double Xbetai=beta->data[0];
+    int iParm=1;
+    for(int k = 0; k < Kc; ++k) 
+      Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
+
+    pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+
+    out->data[0]+= pn;
+    iParm=1;
+    // Adding the continuous
+    for(int k = 0; k < Kc; ++k) {
+      out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
+    }
+  }
+}
+
+
+/* The Hessian of f */
+void 
+wgsl_cont_optim_hessian (const gsl_vector *beta, void *params, 
+			 gsl_matrix *out)
+{
+  fix_parm_cont_T *p = (fix_parm_cont_T *)params;
+  int n = p->y->size; 
+  int Kc = p->Xc->size2; 
+  int npar = beta->size; 
+  gsl_vector *gn = gsl_vector_alloc(npar); /* gn */
+  // Intitialize Hessian out necessary ???
+  gsl_matrix_set_zero(out);
+  /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/
+  for(int i = 1; i < npar; ++i)
+    gsl_matrix_set(out,i,i,(p->lambdaL2));  // Double check this
+  // L1 penalty not working yet, as not differentiable, I may need to do coordinate descent (as in glm_net)
+  
+  for(int i = 0; i < n; ++i) {
+    double pn=0;
+    double aux=0;
+    double Xbetai=beta->data[0];
+    int iParm1=1;
+    for(int k = 0; k < Kc; ++k) 
+      Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++];
+
+    pn= 1/(1 + gsl_sf_exp(-Xbetai));
+    // Add a protection for pn very close to 0 or 1?
+    aux=pn*(1-pn);
+
+    // Calculate sub-gradient vector gn 
+    gsl_vector_set_zero(gn);
+    gn->data[0]= 1;
+    iParm1=1;
+    for(int k = 0; k < Kc; ++k) {
+      gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k);
+    }
+
+    for(int k1=0;k1<npar; ++k1)
+      if(gn->data[k1]!=0)
+	for(int k2=0;k2<npar; ++k2)
+	  if(gn->data[k2]!=0)
+	    *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]);
+  }
+  gsl_vector_free(gn);
+}
+
+
+double wgsl_cont_optim_f(gsl_vector *v, void *params)
+{
+  double mLogLik=0;
+  fix_parm_cont_T *p = (fix_parm_cont_T *)params;
+  mLogLik = fLogit_cont(v,p->Xc,p->y,p->lambdaL1,p->lambdaL2);
+  return mLogLik; 
+}
+
+
+/* Compute both f and df together. */
+void 
+wgsl_cont_optim_fdf (gsl_vector *x, void *params, 
+		     double *f, gsl_vector *df) 
+{
+  *f = wgsl_cont_optim_f(x, params); 
+  wgsl_cont_optim_df(x, params, df);
+}
+
+
+int logistic_cont_fit(gsl_vector *beta
+		      ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc (NULL if not used)
+		      ,gsl_vector *y
+		      ,double lambdaL1
+		      ,double lambdaL2)
+{
+
+  double mLogLik=0;
+  fix_parm_cont_T p;
+  int npar = beta->size; 
+  int iter=0;
+  double maxchange=0;
+
+  //Intializing fix parameters
+  p.Xc=Xc;
+  p.y=y;
+  p.lambdaL1=lambdaL1;
+  p.lambdaL2=lambdaL2;
+  
+  //Initial fit
+  //#ifdef _RPR_DEBUG_
+  mLogLik = wgsl_cont_optim_f(beta,&p);
+  //fprintf(stderr,"#Initial -log(Lik(0))=%lf\n",mLogLik);
+  //#endif //_RPR_DEBUG
+
+  gsl_matrix *myH = gsl_matrix_alloc(npar,npar); /* Hessian matrix*/
+  gsl_vector *stBeta = gsl_vector_alloc(npar); /* Direction to move */
+
+  gsl_vector *myG = gsl_vector_alloc(npar); /* Gradient*/
+  gsl_vector *tau = gsl_vector_alloc(npar); /* tau for QR*/
+
+  for(iter=0;iter<100;iter++){ 
+    wgsl_cont_optim_hessian(beta,&p,myH); //Calculate Hessian
+    wgsl_cont_optim_df(beta,&p,myG);      //Calculate Gradient
+    gsl_linalg_QR_decomp(myH,tau);   //Calculate next beta
+    gsl_linalg_QR_solve(myH,tau,myG,stBeta);
+    gsl_vector_sub(beta,stBeta);
+    
+    //Monitor convergence
+    maxchange=0;
+    for(int i=0;i<npar; i++)
+      if(maxchange<fabs(stBeta->data[i]))
+	maxchange=fabs(stBeta->data[i]);
+
+#ifdef _RPR_DEBUG_
+    mLogLik = wgsl_cont_optim_f(beta,&p);
+    //fprintf(stderr,"#iter %d, -log(Lik(0))=%lf,%lf\n",(int)iter,mLogLik,maxchange);
+#endif //_RPR_DEBUG
+
+    if(maxchange<1E-4)
+      break;
+  }
+
+#ifdef _RPR_DEBUG_
+  //for (int i = 0; i < npar; i++)
+  //  fprintf(stderr,"#par_%d= %lf\n",i,beta->data[i]);
+#endif //_RPR_DEBUG
+
+  //Final fit
+  mLogLik = wgsl_cont_optim_f(beta,&p);
+  //fprintf(stderr,"#Final %d) -log(Lik(0))=%lf, maxchange %lf\n",iter,mLogLik,maxchange);
+
+  gsl_vector_free (tau);
+  gsl_vector_free (stBeta);
+  gsl_vector_free (myG);
+  gsl_matrix_free (myH);
+
+  return 0;
+}
+
diff --git a/src/logistic.h b/src/logistic.h
new file mode 100644
index 0000000..a68ee09
--- /dev/null
+++ b/src/logistic.h
@@ -0,0 +1,70 @@
+#ifndef LOGISTIC_H_   /* Include guard */
+#define LOGISTIC_H_
+
+/* Mixed interface */
+void logistic_mixed_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+			 ,gsl_matrix_int *X  //Matrix Nobs x K 
+			 ,gsl_vector_int *nlev // Vector with number categories
+			 ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
+			 ,gsl_vector *yhat //Vector of prob. predicted by the logistic
+			 );
+ 
+int logistic_mixed_fit(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+		       ,gsl_matrix_int *X  //Matrix Nobs x K 
+		       ,gsl_vector_int *nlev // Vector with number categories
+		       ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
+		       ,gsl_vector *y //Vector of prob. to predict
+		       ,double lambdaL1 // Regularization L1 0.0 if not used
+		       ,double lambdaL2); // Regularization L2 0.0 if not used
+
+double fLogit_mixed(gsl_vector *beta
+		    ,gsl_matrix_int *X
+		    ,gsl_vector_int *nlev
+		    ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
+		    ,gsl_vector *y
+		    ,double lambdaL1
+		    ,double lambdaL2);
+
+
+/* Categorical only interface */
+void logistic_cat_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+		       ,gsl_matrix_int *X  //Matrix Nobs x K 
+		       ,gsl_vector_int *nlev // Vector with number categories
+		       ,gsl_vector *yhat //Vector of prob. predicted by the logistic
+		       );
+ 
+int logistic_cat_fit(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+		     ,gsl_matrix_int *X  //Matrix Nobs x K 
+		     ,gsl_vector_int *nlev // Vector with number categories
+		     ,gsl_vector *y //Vector of prob. to predict
+		     ,double lambdaL1 // Regularization L1 0.0 if not used
+		     ,double lambdaL2); // Regularization L2 0.0 if not used
+
+double fLogit_cat(gsl_vector *beta
+		  ,gsl_matrix_int *X
+		  ,gsl_vector_int *nlev
+		  ,gsl_vector *y
+		  ,double lambdaL1
+		  ,double lambdaL2);
+
+
+/* Continuous only interface */
+void logistic_cont_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+			,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
+			,gsl_vector *yhat //Vector of prob. predicted by the logistic
+			);
+ 
+int logistic_cont_fit(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+		      ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
+		      ,gsl_vector *y //Vector of prob. to predict
+		      ,double lambdaL1 // Regularization L1 0.0 if not used
+		      ,double lambdaL2); // Regularization L2 0.0 if not used
+
+double fLogit_cont(gsl_vector *beta
+		   ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
+		   ,gsl_vector *y
+		   ,double lambdaL1
+		   ,double lambdaL2);
+
+
+#endif // LOGISTIC_H_
diff --git a/src/varcov.cpp b/src/varcov.cpp
new file mode 100644
index 0000000..fdc6f10
--- /dev/null
+++ b/src/varcov.cpp
@@ -0,0 +1,482 @@
+/*
+	Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright (C) 2011  Xiang Zhou
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <iomanip>
+#include <bitset>
+#include <vector>
+#include <map>
+#include <set>
+#include <cstring>
+#include <cmath>
+#include <stdio.h>
+#include <stdlib.h> 
+
+#include "gsl/gsl_vector.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
+
+#include "lapack.h"
+#include "gzstream.h"
+
+#ifdef FORCE_FLOAT
+#include "param_float.h"
+#include "varcov_float.h"
+#include "io_float.h"
+#include "mathfunc_float.h"
+#else
+#include "param.h"
+#include "varcov.h"
+#include "io.h"
+#include "mathfunc.h"
+#endif
+
+
+using namespace std;
+
+
+
+
+void VARCOV::CopyFromParam (PARAM &cPar)
+{
+	d_pace=cPar.d_pace;
+	
+	file_bfile=cPar.file_bfile;
+	file_geno=cPar.file_geno;
+	file_out=cPar.file_out;
+	path_out=cPar.path_out;
+	
+	time_opt=0.0;
+
+	window_cm=cPar.window_cm;
+	window_bp=cPar.window_bp;
+	window_ns=cPar.window_ns;
+	
+	indicator_idv=cPar.indicator_idv;
+	indicator_snp=cPar.indicator_snp;
+	snpInfo=cPar.snpInfo;
+	
+	return;
+}
+
+
+void VARCOV::CopyToParam (PARAM &cPar)
+{
+	cPar.time_opt=time_opt;
+	return;
+}
+
+
+
+//chr rs ps n_idv allele1 allele0 af var window_size r2 (r2_11,n_11,r2_12,n_12...r2_1m,n_1m)
+void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, const vector<vector<double> > &Cov_mat) 
+{
+  string file_cov;
+  file_cov=path_out+"/"+file_out;
+  file_cov+=".cor.txt";
+
+  ofstream outfile;
+
+  if (flag==0) {
+    outfile.open (file_cov.c_str(), ofstream::out);
+    if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;}
+		
+    outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"
+	   <<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"
+	   <<"\t"<<"af"<<"\t"<<"window_size"
+	   <<"\t"<<"var"<<"\t"<<"cor"<<endl;
+  } else {
+    outfile.open (file_cov.c_str(), ofstream::app);
+    if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;}
+
+    for (size_t i=0; i<Cov_mat.size(); i++) {
+      outfile<<snpInfo_sub[i].chr<<"\t"<<snpInfo_sub[i].rs_number<<"\t"<<snpInfo_sub[i].base_position<<"\t"<<snpInfo_sub[i].n_miss<<"\t"<<snpInfo_sub[i].n_idv<<"\t"<<snpInfo_sub[i].a_minor<<"\t"<<snpInfo_sub[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo_sub[i].maf<<"\t"<<Cov_mat[i].size()-1<<"\t";
+      outfile<<scientific<<setprecision(6)<<Cov_mat[i][0]<<"\t";
+
+      if (Cov_mat[i].size()==1) {
+	outfile<<"NA";
+      } else {
+	for (size_t j=1; j<Cov_mat[i].size(); j++) {
+	  if (j==(Cov_mat[i].size()-1)) {
+	    outfile<<Cov_mat[i][j];
+	  } else {
+	    outfile<<Cov_mat[i][j]<<",";
+	  }
+	}
+      }
+      
+      outfile<<endl;
+    }
+  }
+	
+  outfile.close();
+  outfile.clear();
+  return;
+}
+
+
+
+
+// sort SNPs first based on chromosomes then based on bp (or cm, if bp is not available)
+//if chr1=chr2 and bp1=bp2, then return with the same order
+bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2)
+{
+  int c_chr=snpInfo1.chr.compare(snpInfo2.chr);
+  long int c_bp=snpInfo1.base_position-snpInfo2.base_position;
+
+  if(c_chr<0) {
+    return true;
+  } else if (c_chr>0) {
+    return false;
+  } else {
+    if (c_bp<0) {
+      return true;
+    } else if (c_bp>0) {
+      return false;
+    } else {
+      return true;
+    }
+  }
+}
+
+
+// do not sort SNPs (because gzip files do not support random access)
+// then calculate n_nb, the number of neighbours, for each snp
+void VARCOV::CalcNB (vector<SNPINFO> &snpInfo_sort)
+{
+  //  stable_sort(snpInfo_sort.begin(), snpInfo_sort.end(), CompareSNPinfo);
+
+  size_t t2=0, n_nb=0;
+  for (size_t t=0; t<indicator_snp.size(); ++t) {
+    if (indicator_snp[t]==0) {continue;}
+    
+    if (snpInfo_sort[t].chr=="-9" || (snpInfo_sort[t].cM==-9 && window_cm!=0) || (snpInfo_sort[t].base_position==-9 && window_bp!=0) ) {
+      snpInfo_sort[t].n_nb=0; continue;
+    }
+
+    if (t==indicator_snp.size()-1) {snpInfo_sort[t].n_nb=0; continue;}
+
+    t2=t+1; n_nb=0;
+
+    while (t2<indicator_snp.size() && snpInfo_sort[t2].chr==snpInfo_sort[t].chr  && indicator_snp[t2]==0) {t2++;}
+
+    while (t2<indicator_snp.size() && snpInfo_sort[t2].chr==snpInfo_sort[t].chr && (snpInfo_sort[t2].cM-snpInfo_sort[t].cM<window_cm || window_cm==0) && (snpInfo_sort[t2].base_position-snpInfo_sort[t].base_position<window_bp || window_bp==0) && (n_nb<window_ns|| window_ns==0)  ) {
+      t2++; n_nb++;
+      while (t2<indicator_snp.size() && snpInfo_sort[t2].chr==snpInfo_sort[t].chr && indicator_snp[t2]==0) {t2++;}
+    }
+
+    snpInfo_sort[t].n_nb=n_nb;
+  }
+
+  return;
+}
+
+
+
+//vector double is centered to have mean 0
+void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec)
+{
+  cov_vec.clear();
+
+  double v1, v2, r;
+  vector<double> x_vec=X_mat[0];
+
+  lapack_ddot(x_vec, x_vec, v1);
+  cov_vec.push_back(v1/(double)x_vec.size() );
+
+  for (size_t i=1; i<X_mat.size(); i++) {
+    lapack_ddot(X_mat[i], x_vec, r);
+    lapack_ddot(X_mat[i], X_mat[i], v2);
+    r/=sqrt(v1*v2);
+
+    cov_vec.push_back(r);
+  }
+
+  return;
+} 
+
+/*
+//somehow this can produce nan for some snps; perhaps due to missing values?
+//vector int is not centered, and have 0/1/2 values only
+//missing value is -9; to calculate covariance, these values are replaced by 
+void Calc_Cor(const vector<vector<int> > &X_mat, vector<double> &cov_vec)
+{
+  cov_vec.clear();
+
+  int d1, d2, m1, m2, n1, n2, n12;
+  double m1d, m2d, m12d, v1d, v2d, v;
+
+  vector<int> x_vec=X_mat[0];
+  
+  //calculate m2
+  m2=0; n2=0;
+  for (size_t j=0; j<x_vec.size(); j++) {
+    d2=x_vec[j];
+    if (d2==-9) {continue;}
+    m2+=d2; n2++;
+  }
+  m2d=(double)m2/(double)n2;
+
+  for (size_t i=0; i<X_mat.size(); i++) {
+    //calculate m1
+    m1=0; n1=0;
+    for (size_t j=0; j<x_vec.size(); j++) {
+      d1=X_mat[i][j];
+      if (d1==-9) {continue;}
+      m1+=d1; n1++;
+    }
+    m1d=(double)m1/(double)n1;
+
+    //calculate v1
+    m1=0; m2=0; m12d=0; n12=0;
+    for (size_t j=0; j<x_vec.size(); j++) {
+      d1=X_mat[i][j];
+      if (d1==-9) {
+	n12++;
+      } else if (d1!=0) {
+	if (d1==1) {m12d+=1;} else if (d1==2) {m12d+=4;} else {m12d+=(double)(d1*d1);}
+      }
+    }
+    
+    v1d=((double)m12d+m1d*(double)m2+m2d*(double)m1+(double)n12*m1d*m2d)/(double)x_vec.size()-m1d*m2d;
+
+    //calculate covariance
+    if (i!=0) {
+      m1=0; m2=0; m12d=0; n12=0;
+      for (size_t j=0; j<x_vec.size(); j++) {
+	d1=X_mat[i][j]; d2=x_vec[j];
+	if (d1==-9 && d2==-9) {
+	  n12++;
+	} else if (d1==-9) {
+	  m2+=d2;
+	} else if (d2==-9) {
+	  m1+=d1;
+	} else if (d1!=0 && d2!=0) {
+	  if (d1==1) {m12d+=d2;} else if (d1==2) {m12d+=d2+d2;} else {m12d+=(double)(d1*d2);}
+	}
+      }
+    
+      v=((double)m12d+m1d*(double)m2+m2d*(double)m1+(double)n12*m1d*m2d)/(double)x_vec.size()-m1d*m2d;
+      v/=sqrt(v1d*v2d);
+    } else {
+      v2d=v1d;
+      v=v1d/(double)x_vec.size();
+    }
+    
+    cov_vec.push_back(v);
+  }
+  return;
+} 
+*/
+
+
+//read the genotype file again, calculate r2 between pairs of SNPs within a window, output the file every 10K SNPs
+//the output results are sorted based on chr and bp
+//output format similar to assoc.txt files (remember n_miss is replaced by n_idv)
+
+//r2 between the current SNP and every following SNPs within the window_size (which can vary if cM was used)
+//read bimbam mean genotype file and calculate the covariance matrix for neighboring SNPs
+//output values at 10000-SNP-interval
+void VARCOV::AnalyzeBimbam ()
+{
+  igzstream infile (file_geno.c_str(), igzstream::in);
+  if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+
+  //calculate the number of right-hand-side neighbours for each SNP
+  vector<SNPINFO> snpInfo_sub;
+  CalcNB(snpInfo);
+
+  size_t ni_test=0;
+  for (size_t i=0; i<indicator_idv.size(); i++) {
+    ni_test+=indicator_idv[i];
+  }
+  
+  gsl_vector *geno=gsl_vector_alloc (ni_test);
+  double geno_mean;
+
+  vector<double> x_vec, cov_vec;
+  vector<vector<double> > X_mat, Cov_mat;
+
+  for (size_t i=0; i<ni_test; i++) {
+    x_vec.push_back(0);
+  }
+
+  WriteCov (0, snpInfo_sub, Cov_mat);
+
+  size_t t2=0, inc;
+  int n_nb=0;
+
+  for (size_t t=0; t<indicator_snp.size(); ++t) {
+    if (t%d_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
+    if (indicator_snp[t]==0) {continue;}
+    //    if (t>=2) {break;}    
+
+    if (X_mat.size()==0) {
+      n_nb=snpInfo[t].n_nb+1;
+    } else {
+      n_nb=snpInfo[t].n_nb-n_nb+1;       
+    }
+
+    for (int i=0; i<n_nb; i++) {
+      if (X_mat.size()==0) {t2=t;} 
+
+      //read a line of the snp is filtered out
+      inc=0;
+      while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
+	t2++; inc++; 
+      }
+
+      Bimbam_ReadOneSNP (inc, indicator_idv, infile, geno, geno_mean);
+      gsl_vector_add_constant (geno, -1.0*geno_mean);
+            
+      for (size_t j=0; j<geno->size; j++) {
+	x_vec[j]=gsl_vector_get(geno, j);
+      }
+      X_mat.push_back(x_vec);
+
+      t2++;
+    }     
+    
+    n_nb=snpInfo[t].n_nb;
+
+    Calc_Cor(X_mat, cov_vec);
+    Cov_mat.push_back(cov_vec);
+    snpInfo_sub.push_back(snpInfo[t]);
+
+    X_mat.erase(X_mat.begin());
+
+    //write out var/cov values
+    if (Cov_mat.size()==10000) {
+      WriteCov (1, snpInfo_sub, Cov_mat);
+      Cov_mat.clear();
+      snpInfo_sub.clear();
+    }
+  }
+
+  if (Cov_mat.size()!=0) {
+    WriteCov (1, snpInfo_sub, Cov_mat);
+    Cov_mat.clear();
+    snpInfo_sub.clear();
+  }
+
+  gsl_vector_free(geno);
+
+  infile.close();
+  infile.clear();	
+	
+  return;
+}
+
+
+
+
+void VARCOV::AnalyzePlink ()
+{
+  string file_bed=file_bfile+".bed";
+  ifstream infile (file_bed.c_str(), ios::binary);
+  if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+
+  //calculate the number of right-hand-side neighbours for each SNP
+  vector<SNPINFO> snpInfo_sub;
+  CalcNB(snpInfo);
+  
+  size_t ni_test=0;
+  for (size_t i=0; i<indicator_idv.size(); i++) {
+    ni_test+=indicator_idv[i];
+  }
+
+  gsl_vector *geno=gsl_vector_alloc (ni_test);
+  double geno_mean;
+
+  vector<double> x_vec, cov_vec;
+  vector<vector<double> > X_mat, Cov_mat;
+
+  for (size_t i=0; i<ni_test; i++) {
+    x_vec.push_back(0);
+  }
+
+  WriteCov (0, snpInfo_sub, Cov_mat);
+
+  size_t t2=0, inc;
+  int n_nb=0;
+
+  for (size_t t=0; t<indicator_snp.size(); ++t) {
+    if (t%d_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
+    if (indicator_snp[t]==0) {continue;}
+    //    if (t>=2) {break;}    
+
+    if (X_mat.size()==0) {
+      n_nb=snpInfo[t].n_nb+1;
+    } else {
+      n_nb=snpInfo[t].n_nb-n_nb+1;       
+    }
+
+    for (int i=0; i<n_nb; i++) {
+      if (X_mat.size()==0) {t2=t;} 
+
+      //read a line of the snp is filtered out
+      inc=0;
+      while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
+	t2++; inc++; 
+      }
+
+      Plink_ReadOneSNP (t2, indicator_idv, infile, geno, geno_mean);
+      gsl_vector_add_constant (geno, -1.0*geno_mean);
+            
+      for (size_t j=0; j<geno->size; j++) {
+	x_vec[j]=gsl_vector_get(geno, j);
+      }
+      X_mat.push_back(x_vec);
+
+      t2++;
+    }     
+    
+    n_nb=snpInfo[t].n_nb;
+
+    Calc_Cor(X_mat, cov_vec);
+    Cov_mat.push_back(cov_vec);
+    snpInfo_sub.push_back(snpInfo[t]);
+
+    X_mat.erase(X_mat.begin());
+
+    //write out var/cov values
+    if (Cov_mat.size()==10000) {
+      WriteCov (1, snpInfo_sub, Cov_mat);
+      Cov_mat.clear();
+      snpInfo_sub.clear();
+    }
+  }
+
+  if (Cov_mat.size()!=0) {
+    WriteCov (1, snpInfo_sub, Cov_mat);
+    Cov_mat.clear();
+    snpInfo_sub.clear();
+  }
+
+  gsl_vector_free(geno);
+
+  infile.close();
+  infile.clear();	
+	
+  return;
+}
diff --git a/src/varcov.h b/src/varcov.h
new file mode 100644
index 0000000..b380b8c
--- /dev/null
+++ b/src/varcov.h
@@ -0,0 +1,72 @@
+/*
+	Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright (C) 2011  Xiang Zhou
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __VARCOV_H__                
+#define __VARCOV_H__
+
+#include "gsl/gsl_vector.h"
+#include "gsl/gsl_matrix.h"
+
+
+#ifdef FORCE_FLOAT
+#include "param_float.h"
+#include "io_float.h"
+#else
+#include "param.h"
+#include "io.h"
+#endif
+
+using namespace std;
+
+
+
+
+class VARCOV {
+
+public:
+	// IO related parameters
+	string file_out;
+	string path_out;
+	string file_geno;
+	string file_bfile;
+	int d_pace;
+
+	vector<int> indicator_idv;
+	vector<int> indicator_snp;
+
+	vector<SNPINFO> snpInfo;
+
+	double time_opt;
+
+	// Class specific parameters
+	double window_cm;
+	size_t window_bp;
+	size_t window_ns;
+	
+	// Main functions
+	void CopyFromParam (PARAM &cPar);
+	void CopyToParam (PARAM &cPar);
+	void CalcNB (vector<SNPINFO> &snpInfo_sort);
+	void WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, const vector<vector<double> > &Cov_mat);
+	void AnalyzeBimbam ();
+	void AnalyzePlink ();
+};
+
+#endif
+
+