/*
 Genome-wide Efficient Mixed Model Association (GEMMA)
 Copyright (C) 2011  Xiang Zhou

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>
#include <fstream>
#include <sstream>

#include <iomanip>
#include <cmath>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <ctime>
#include <cstring>
#include <algorithm>

#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_eigen.h"
#include "gsl/gsl_randist.h"
#include "gsl/gsl_cdf.h"
#include "gsl/gsl_roots.h"



#include "logistic.h"
#include "lapack.h"
#include "io.h"

#ifdef FORCE_FLOAT
#include "param_float.h"
#include "bslmmdap_float.h"
#include "lmm_float.h"  //for class FUNC_PARAM and MatrixCalcLR
#include "lm_float.h"
#include "mathfunc_float.h"  //for function CenterVector
#else
#include "param.h"
#include "bslmmdap.h"
#include "lmm.h"
#include "lm.h"
#include "mathfunc.h"
#endif

using namespace std;




void BSLMMDAP::CopyFromParam (PARAM &cPar)
{
	file_out=cPar.file_out;
	path_out=cPar.path_out;

	time_UtZ=0.0;
	time_Omega=0.0;

	h_min=cPar.h_min;
	h_max=cPar.h_max;
	h_ngrid=cPar.h_ngrid;
	rho_min=cPar.rho_min;
	rho_max=cPar.rho_max;
	rho_ngrid=cPar.rho_ngrid;

	if (h_min<=0) {h_min=0.01;}
	if (h_max>=1) {h_max=0.99;}
	if (rho_min<=0) {rho_min=0.01;}
	if (rho_max>=1) {rho_max=0.99;}

	trace_G=cPar.trace_G;

	ni_total=cPar.ni_total;
	ns_total=cPar.ns_total;
	ni_test=cPar.ni_test;
	ns_test=cPar.ns_test;

	indicator_idv=cPar.indicator_idv;
	indicator_snp=cPar.indicator_snp;
	snpInfo=cPar.snpInfo;

	return;
}


void BSLMMDAP::CopyToParam (PARAM &cPar)
{
	cPar.time_UtZ=time_UtZ;
	cPar.time_Omega=time_Omega;

	return;
}



//read hyp file
void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, vector<double> &vec_sb2, vector<double> &vec_wab)
{
  vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear();

  igzstream infile (file_hyp.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open hyp file: "<<file_hyp<<endl; return;}

  string line;
  char *ch_ptr;

  getline(infile, line);

  while (!safeGetline(infile, line).eof()) {
    ch_ptr=strtok ((char *)line.c_str(), " , \t");
    ch_ptr=strtok (NULL, " , \t");

    ch_ptr=strtok (NULL, " , \t");
    vec_sa2.push_back(atof(ch_ptr));

    ch_ptr=strtok (NULL, " , \t");
    vec_sb2.push_back(atof(ch_ptr));

    ch_ptr=strtok (NULL, " , \t");
    vec_wab.push_back(atof(ch_ptr));
  }

  infile.close();
  infile.clear();

  return;
}


//read bf file
void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<vector<double> > > &BF)
{
  BF.clear(); vec_rs.clear();

  igzstream infile (file_bf.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open bf file: "<<file_bf<<endl; return;}

  string line, rs, block;
  vector<double> vec_bf;
  vector<vector<double> > mat_bf;
  char *ch_ptr;

  size_t bf_size, flag_block;

  getline(infile, line);

  size_t t=0;
  while (!safeGetline(infile, line).eof()) {
    flag_block=0;

    ch_ptr=strtok ((char *)line.c_str(), " , \t");
    rs=ch_ptr;
    vec_rs.push_back(rs);

    ch_ptr=strtok (NULL, " , \t");
    if (t==0) {
      block=ch_ptr;
    } else {
      if (strcmp(ch_ptr, block.c_str() )!=0) {
	flag_block=1;
	block=ch_ptr;
      }
    }

    ch_ptr=strtok (NULL, " , \t");
    while (ch_ptr!=NULL) {
      vec_bf.push_back(atof(ch_ptr));
      ch_ptr=strtok (NULL, " , \t");
    }

    if (t==0) {
      bf_size=vec_bf.size();
    } else {
      if (bf_size!=vec_bf.size()) {cout<<"error! unequal row size in bf file."<<endl;}
    }

    if (flag_block==0) {
      mat_bf.push_back(vec_bf);
    } else {
      BF.push_back(mat_bf);
      mat_bf.clear();
    }
    vec_bf.clear();

    t++;
  }

  infile.close();
  infile.clear();

  return;
}


//read category files
//read both continuous and discrete category file, record mapRS2catc
void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd)
{
  igzstream infile (file_cat.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return;}

  string line;
  char *ch_ptr;

  string rs, chr, a1, a0, pos, cm;

  //read header
  HEADER header;
  !safeGetline(infile, line).eof();
  ReadHeader_io (line, header);

  //use the header to determine the number of categories
  kc=header.catc_col.size(); kd=header.catd_col.size();

  //set up storage and mapper
  map<string, vector<double> > mapRS2catc;
  map<string, vector<int> > mapRS2catd;
  vector<double> catc;
  vector<int> catd;

  //read the following lines to record mapRS2cat
  while (!safeGetline(infile, line).eof()) {
    ch_ptr=strtok ((char *)line.c_str(), " , \t");

    if (header.rs_col==0) {
      rs=chr+":"+pos;
    }

    catc.clear(); catd.clear();

    for (size_t i=0; i<header.coln; i++) {
      if (header.rs_col!=0 && header.rs_col==i+1) {
	rs=ch_ptr;
      } else if (header.chr_col!=0 && header.chr_col==i+1) {
	chr=ch_ptr;
      } else if (header.pos_col!=0 && header.pos_col==i+1) {
	pos=ch_ptr;
      } else if (header.cm_col!=0 && header.cm_col==i+1) {
	cm=ch_ptr;
      } else if (header.a1_col!=0 && header.a1_col==i+1) {
	a1=ch_ptr;
      } else if (header.a0_col!=0 && header.a0_col==i+1) {
	a0=ch_ptr;
      } else if (header.catc_col.size()!=0 && header.catc_col.count(i+1)!=0 ) {
	catc.push_back(atof(ch_ptr));
      } else if (header.catd_col.size()!=0 && header.catd_col.count(i+1)!=0 ) {
	catd.push_back(atoi(ch_ptr));
      } else {}

      ch_ptr=strtok (NULL, " , \t");
    }

    if (mapRS2catc.count(rs)==0 && kc>0) {mapRS2catc[rs]=catc;}
    if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;}
  }

  //load into Ad and Ac
  if (kc>0) {
    Ac=gsl_matrix_alloc(vec_rs.size(), kc);
    for (size_t i=0; i<vec_rs.size(); i++) {
      if (mapRS2catc.count(vec_rs[i])!=0) {
	for (size_t j=0; j<kc; j++) {
	  gsl_matrix_set(Ac, i, j, mapRS2catc[vec_rs[i]][j]);
	}
      } else {
	for (size_t j=0; j<kc; j++) {
	  gsl_matrix_set(Ac, i, j, 0);
	}
      }
    }
  }

  if (kd>0) {
    Ad=gsl_matrix_int_alloc(vec_rs.size(), kd);

    for (size_t i=0; i<vec_rs.size(); i++) {
      if (mapRS2catd.count(vec_rs[i])!=0) {
	for (size_t j=0; j<kd; j++) {
	  gsl_matrix_int_set(Ad, i, j, mapRS2catd[vec_rs[i]][j]);
	}
      } else {
	for (size_t j=0; j<kd; j++) {
	  gsl_matrix_int_set(Ad, i, j, 0);
	}
      }
    }

    dlevel=gsl_vector_int_alloc(kd);
    map<int, int> rcd;
    int val;
    for (size_t j=0; j<kd; j++) {
      rcd.clear();
      for (size_t i=0; i<Ad->size1; i++) {
	val = gsl_matrix_int_get(Ad, i, j);
	rcd[val] = 1;
      }
      gsl_vector_int_set (dlevel, j, rcd.size());
    }
  }

  infile.clear();
  infile.close();

  return;
}








void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF)
{
  string file_bf, file_hyp;
	file_bf=path_out+"/"+file_out;
	file_bf+=".bf.txt";
	file_hyp=path_out+"/"+file_out;
	file_hyp+=".hyp.txt";

	ofstream outfile_bf, outfile_hyp;

	outfile_bf.open (file_bf.c_str(), ofstream::out);
	outfile_hyp.open (file_hyp.c_str(), ofstream::out);

	if (!outfile_bf) {cout<<"error writing file: "<<file_bf<<endl; return;}
	if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;}

	outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<"weight"<<endl;
	outfile_hyp<<scientific;
	for (size_t i=0; i<Hyper->size1; i++) {
	  for (size_t j=0; j<Hyper->size2; j++) {
	    outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t";
	  }
	  outfile_hyp<<endl;
	}

	outfile_bf<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss";
	for (size_t i=0; i<BF->size2; i++) {
	  outfile_bf<<"\t"<<"BF"<<i+1;
	}
	outfile_bf<<endl;

	size_t t=0;
	for (size_t i=0; i<ns_total; ++i) {
	  if (indicator_snp[i]==0) {continue;}

	  outfile_bf<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
		    <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss;

	  outfile_bf<<scientific;
	  for (size_t j=0; j<BF->size2; j++) {
	    outfile_bf<<"\t"<<setprecision(6)<<gsl_matrix_get (BF, t, j);
	  }
	  outfile_bf<<endl;

	  t++;
	}

	outfile_hyp.close();
	outfile_hyp.clear();
	outfile_bf.close();
	outfile_bf.clear();
	return;
}



void BSLMMDAP::WriteResult (const vector<string> &vec_rs, const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef)
{
  string file_gamma, file_hyp, file_coef;
	file_gamma=path_out+"/"+file_out;
	file_gamma+=".gamma.txt";
	file_hyp=path_out+"/"+file_out;
	file_hyp+=".hyp.txt";
	file_coef=path_out+"/"+file_out;
	file_coef+=".coef.txt";

	ofstream outfile_gamma, outfile_hyp, outfile_coef;

	outfile_gamma.open (file_gamma.c_str(), ofstream::out);
	outfile_hyp.open (file_hyp.c_str(), ofstream::out);
	outfile_coef.open (file_coef.c_str(), ofstream::out);

	if (!outfile_gamma) {cout<<"error writing file: "<<file_gamma<<endl; return;}
	if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;}
	if (!outfile_coef) {cout<<"error writing file: "<<file_coef<<endl; return;}

	outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<"weight"<<endl;
	outfile_hyp<<scientific;
	for (size_t i=0; i<Hyper->size1; i++) {
	  for (size_t j=0; j<Hyper->size2; j++) {
	    outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t";
	  }
	  outfile_hyp<<endl;
	}


	outfile_gamma<<"rs"<<"\t"<<"gamma"<<endl;
	for (size_t i=0; i<vec_rs.size(); ++i) {
	  outfile_gamma<<vec_rs[i]<<"\t"<<scientific<<setprecision(6)<<gsl_vector_get(pip, i)<<endl;
	}

	outfile_coef<<"coef"<<endl;
	outfile_coef<<scientific;
	for (size_t i=0; i<coef->size; i++) {
	  outfile_coef<<setprecision(6)<<gsl_vector_get (coef, i)<<endl;
	}

	outfile_coef.close();
	outfile_coef.clear();
	outfile_hyp.close();
	outfile_hyp.clear();
	outfile_gamma.close();
	outfile_gamma.clear();
	return;
}




/*
void BSLMMDAP::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank)
{
	size_t pos;
	for (size_t i=0; i<rank.size(); ++i) {
		pos=mapRank2pos[rank[i]];
		gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, i);
		gsl_vector_const_view X_col=gsl_matrix_const_column (X, pos);
		gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);
	}

	return;
}
*/

double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_b2, const double tau)
{
	gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);

	double logm=0.0;
	double d, uy, Hi_yy=0, logdet_H=0.0;
	for (size_t i=0; i<ni_test; ++i) {
		d=gsl_vector_get (K_eval, i)*sigma_b2;
		d=1.0/(d+1.0);
		gsl_vector_set (weight_Hi, i, d);

		logdet_H-=log(d);
		uy=gsl_vector_get (Uty, i);
		Hi_yy+=d*uy*uy;
	}

	//calculate likelihood
	logm=-0.5*logdet_H-0.5*tau*Hi_yy+0.5*log(tau)*(double)ni_test;

	gsl_vector_free (weight_Hi);

	return logm;
}


double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_a2, const double sigma_b2, const double tau)
{
  clock_t  time_start;
	double logm=0.0;
	double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;

	gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, UtXgamma->size2);
	gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
	gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
	gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
	gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);

	gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);

	logdet_H=0.0; P_yy=0.0;
	for (size_t i=0; i<ni_test; ++i) {
		gsl_vector_view UtXgamma_row=gsl_matrix_row (UtXgamma_eval, i);
		d=gsl_vector_get (K_eval, i)*sigma_b2;
		d=1.0/(d+1.0);
		gsl_vector_set (weight_Hi, i, d);

		logdet_H-=log(d);
		uy=gsl_vector_get (Uty, i);
		P_yy+=d*uy*uy;
		gsl_vector_scale (&UtXgamma_row.vector, d);
	}

	//calculate Omega
	gsl_matrix_set_identity (Omega);

	time_start=clock();
#ifdef WITH_LAPACK
	lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega);
#else
	gsl_blas_dgemm (CblasTrans, CblasNoTrans, sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega);
#endif
	time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);

	//calculate beta_hat
	gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);

	logdet_O=CholeskySolve(Omega, XtHiy, beta_hat);

	gsl_vector_scale (beta_hat, sigma_a2);

	gsl_blas_ddot (XtHiy, beta_hat, &d);
	P_yy-=d;

	gsl_matrix_free (UtXgamma_eval);
	gsl_matrix_free (Omega);
	gsl_vector_free (XtHiy);
	gsl_vector_free (beta_hat);
	gsl_vector_free (weight_Hi);

	logm=-0.5*logdet_H-0.5*logdet_O-0.5*tau*P_yy+0.5*log(tau)*(double)ni_test;

	return logm;
}


double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) {
  double logprior=0;
  logprior=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
  return logprior;
}


//where A is the ni_test by n_cat matrix of annotations
void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y) {
	clock_t time_start;

	//set up BF
	double tau, h, rho, sigma_a2, sigma_b2, d;
	size_t ns_causal=10;
	size_t n_grid=h_ngrid*rho_ngrid;
	vector<double> vec_sa2, vec_sb2, logm_null;

	gsl_matrix *BF=gsl_matrix_alloc(ns_test, n_grid);
	gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1);
	gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);

	//compute tau by using yty
	gsl_blas_ddot (Uty, Uty, &tau);
	tau=(double)ni_test/tau;

	//set up grid values for sigma_a2 and sigma_b2 based on an approximately even grid for h and rho, and a fixed number of causals
	size_t ij=0;
	for (size_t i=0; i<h_ngrid; i++) {
	  h=h_min+(h_max-h_min)*(double)i/((double)h_ngrid-1);
	  for (size_t j=0; j<rho_ngrid; j++) {
	    rho=rho_min+(rho_max-rho_min)*(double)j/((double)rho_ngrid-1);

	    sigma_a2=h*rho/((1-h)*(double)ns_causal);
	    sigma_b2=h*(1.0-rho)/(trace_G*(1-h));

	    vec_sa2.push_back(sigma_a2);
	    vec_sb2.push_back(sigma_b2);
	    logm_null.push_back(CalcMarginal (Uty, K_eval, 0.0, tau));

	    gsl_matrix_set (Hyper, ij, 0, h);
	    gsl_matrix_set (Hyper, ij, 1, rho);
	    gsl_matrix_set (Hyper, ij, 2, sigma_a2);
	    gsl_matrix_set (Hyper, ij, 3, sigma_b2);
	    gsl_matrix_set (Hyper, ij, 4, 1/(double)n_grid);
	    ij++;
	  }
	}

	//compute BF factors
	time_start=clock();
	cout<<"Calculating BF..."<<endl;
	for (size_t t=0; t<ns_test; t++) {
	  gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, 0);
	  gsl_vector_const_view X_col=gsl_matrix_const_column (UtX, t);
	  gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);

	  for (size_t ij=0; ij<n_grid; ij++) {
	    sigma_a2=vec_sa2[ij];
	    sigma_b2=vec_sb2[ij];

	    d=CalcMarginal (Xgamma, Uty, K_eval, sigma_a2, sigma_b2, tau);
	    d-=logm_null[ij];
	    d=exp(d);

	    gsl_matrix_set(BF, t, ij, d);
	  }
	}
	time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);

	//save results
	WriteResult (Hyper, BF);

	//free matrices and vectors
	gsl_matrix_free(BF);
	gsl_matrix_free(Xgamma);
	gsl_matrix_free(Hyper);
	return;
}





void single_ct_regression(const gsl_matrix_int *Xd, const gsl_vector_int *dlevel, const gsl_vector *pip_vec, gsl_vector *coef, gsl_vector *prior_vec) {

  map<int,double> sum_pip;
  map<int,double> sum;

  int levels = gsl_vector_int_get(dlevel,0);

  for(int i=0;i<levels;i++){
    sum_pip[i] = sum[i] = 0;
  }

  for(int i=0;i<Xd->size1;i++){
    int cat = gsl_matrix_int_get(Xd,i,0);
    sum_pip[cat] += gsl_vector_get(pip_vec,i);
    sum[cat] += 1;
  }

  for(int i=0;i<Xd->size1;i++){
    int cat = gsl_matrix_int_get(Xd,i,0);
    gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]);
  }

  //double baseline=0;
  for(int i=0;i<levels;i++){
    double new_prior = sum_pip[i]/sum[i];
    //gsl_vector_set(coef, i, log(new_prior/(1-new_prior))-baseline);
    //if(i==0){
    //baseline = log(new_prior/(1-new_prior));
    //}
    gsl_vector_set(coef, i, log(new_prior/(1-new_prior)) );
  }

  return;
}




//where A is the ni_test by n_cat matrix of annotations
void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) {
	clock_t time_start;

	//set up BF
	double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
	size_t t1, t2;
	size_t n_grid=wab.size(), ns_test=vec_rs.size();

	gsl_vector *prior_vec=gsl_vector_alloc(ns_test);
	gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
	gsl_vector *pip=gsl_vector_alloc(ns_test);
	gsl_vector *coef=gsl_vector_alloc(kc+kd+1);

	//perform the EM algorithm
	vector<double> vec_wab, vec_wab_new;

	//initial values
	for (size_t t=0; t<ns_test; t++) {
	  gsl_vector_set (prior_vec, t, (double)BF.size()/(double)ns_test);
	}
	for (size_t ij=0; ij<n_grid; ij++) {
	  vec_wab.push_back(wab[ij]);
	  vec_wab_new.push_back(wab[ij]);
	}

	//EM iteration
	size_t it=0;
	double dif=1;
	while (it<100 && dif>1e-3) {
	  //update E_gamma
	  t1=0, t2=0;
	  for (size_t b=0; b<BF.size(); b++) {
	    s=1;
	    for (size_t m=0; m<BF[b].size(); m++) {
	      d=0;
	      for (size_t ij=0; ij<n_grid; ij++) {
		d+=vec_wab_new[ij]*BF[b][m][ij];
	      }
	      d*=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1));

	      gsl_vector_set(pip, t1, d);
	      s+=d;
	      t1++;
	    }

	    for (size_t m=0; m<BF[b].size(); m++) {
	      d=gsl_vector_get(pip, t2)/s;
	      gsl_vector_set(pip, t2, d);
	      t2++;
	    }
	  }

	  //update E_wab
	  s=0;
	  for (size_t ij=0; ij<n_grid; ij++) {
	    vec_wab_new[ij]=0;

	    t1=0;
	    for (size_t b=0; b<BF.size(); b++) {
	      d=1;
	      for (size_t m=0; m<BF[b].size(); m++) {
		d+=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij];
		t1++;
	      }
	      vec_wab_new[ij]+=log(d);
	    }

	    s=max(s, vec_wab_new[ij]);
	  }

	  d=0;
	  for (size_t ij=0; ij<n_grid; ij++) {
	    vec_wab_new[ij]=exp(vec_wab_new[ij]-s);
	    d+=vec_wab_new[ij];
	  }

	  for (size_t ij=0; ij<n_grid; ij++) {
	    vec_wab_new[ij]/=d;
	    //	    vec_wab[ij]=vec_wab_new[ij];
	  }

	  //update coef, and pi
	  if(kc==0 && kd==0){//no annotation
	    s=0;
	    for (size_t t=0; t<pip->size; t++) {
	      s+=gsl_vector_get(pip, t);
	    }
	    s=s/(double)pip->size;
	    for (size_t t=0; t<pip->size; t++) {
	      gsl_vector_set(prior_vec, t, s);
	    }

	    gsl_vector_set (coef, 0, log(s/(1-s)));
	  } else if(kc==0 && kd!=0){//only discrete annotations
	    if(kd == 1){
	      single_ct_regression(Ad, dlevel, pip, coef, prior_vec);
	    }else{
	      logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0);
	      logistic_cat_pred(coef, Ad, dlevel, prior_vec);
	    }
	  } else if (kc!=0 && kd==0) {//only continuous annotations
	    logistic_cont_fit(coef, Ac, pip, 0, 0);
	    logistic_cont_pred(coef, Ac, prior_vec);
	  } else if (kc!=0 && kd!=0) {//both continuous and categorical annotations
	    logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0);
	    logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec);
	  }

	  //compute marginal likelihood
	  logm=0;

	  t1=0;
	  for (size_t b=0; b<BF.size(); b++) {
	    d=1; s=0;
	    for (size_t m=0; m<BF[b].size(); m++) {
	      s+=log(1-gsl_vector_get(prior_vec, t1));
	      for (size_t ij=0; ij<n_grid; ij++) {
		d+=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij];
	      }
	    }
	    logm+=log(d)+s;
	    t1++;
	  }

	  if (it>0) {
	    dif=logm-logm_save;
	  }
	  logm_save=logm;
	  it++;

	  cout<<"iteration = "<<it<<"; marginal likelihood = "<<logm<<endl;
	}

	//update h and rho that correspond to w_ab
	for (size_t ij=0; ij<n_grid; ij++) {
	  sigma_a2=vec_sa2[ij];
	  sigma_b2=vec_sb2[ij];

	  d=exp(gsl_vector_get(coef, coef->size-1))/(1+exp(gsl_vector_get(coef, coef->size-1)));
	  h=(d*(double)ns_test*sigma_a2+1*sigma_b2)/(1+d*(double)ns_test*sigma_a2+1*sigma_b2);
	  rho=d*(double)ns_test*sigma_a2/(d*(double)ns_test*sigma_a2+1*sigma_b2);

	  gsl_matrix_set (Hyper, ij, 0, h);
	  gsl_matrix_set (Hyper, ij, 1, rho);
	  gsl_matrix_set (Hyper, ij, 2, sigma_a2);
	  gsl_matrix_set (Hyper, ij, 3, sigma_b2);
	  gsl_matrix_set (Hyper, ij, 4, vec_wab_new[ij]);
	}

	//obtain beta and alpha parameters


	//save results
	WriteResult (vec_rs, Hyper, pip, coef);

	//free matrices and vectors
	gsl_vector_free(prior_vec);
	gsl_matrix_free(Hyper);
	gsl_vector_free(pip);
	gsl_vector_free(coef);
	return;
}

/*
//readin the estimated hyper-parameters and perform fine mapping for each region
void BSLMM::DAP_FineMapping (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_matrix *A, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y, gsl_matrix *Hyper, gsl_vector *alpha, gsl_vector *pip) {
	clock_t time_start;

	//two priority sets: S_1 contains all candidate causal SNPs; S_2 contains the prioritized combintion of them
	//two marginal probability sets: P_1 contains marginals for S_1; P_2 contains marginals for S_2;
	set<size_t> S1set, S2set;
	vector<size_t> S1vec;
	vector<set<size_t> > S2vec;
	vector<double> P1, P2;

	//calculate P0 (null) and P1 (for every SNP)



	//loop through the number of combinations
	for (size_t s=0; s<p; s++) {
	  //if (s==0), set up S_1: compute marginal of the null model, then compute P_1, then compute BF_1 and use them to select S_1; compute C_1



	  //if (s==1), set up S_2: compute pair-wise P_2 and use them to select S_2; compute C_2

	  //otherwise, match each combination of S_2 with each SNP from S_1, select into S_3; and replace S_2 with S_3; compute C_s


	  //stop when the stopping critieria are reached (if S_2 is empty; if t; if kappa); add the residual component R

	for (size_t t=0; t<total_step; ++t) {
		if (t%d_pace==0 || t==total_step-1) {ProgressBar ("Running MCMC ", t, total_step-1, (double)n_accept/(double)(t*n_mh+1));}
//		if (t>10) {break;}

		if (a_mode==13) {
			SampleZ (y, z_hat, z);
			mean_z=CenterVector (z);

			time_start=clock();
			gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz);
			time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);

			//First proposal
			if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
				logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
				beta_old.clear();
				for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
				  beta_old.push_back(0);
				}
			}
			else {
				gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp_old.n_gamma);
				gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma);
				SetXgamma (UtXgamma, UtX, rank_old);
				logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old);

				beta_old.clear();
				for (size_t i=0; i<beta->size; ++i) {
					beta_old.push_back(gsl_vector_get(beta, i));
				}
				gsl_matrix_free (UtXgamma);
				gsl_vector_free (beta);
			}
		}


	delete [] p_gamma;
	beta_g.clear();

	return;
}

*/






/*
//below fits MCMC for rho=1
void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty)
{
  time_t time_start=clock();
  gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size);
  gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
  gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size);

#ifdef WITH_LAPACK
  lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix);
#else
  gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix);
#endif
  gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector);

  time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);

  return;
}



double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp)
{
	double logpost=0.0;

	//for quantitative traits, calculate pve and pge
	//pve and pge for case/control data are calculted in CalcCC_PVEnZ
	if (a_mode==11) {
		cHyp.pve=0.0;
		cHyp.pge=1.0;
	}

	//calculate likelihood
	if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);}
	else {logpost-=0.5*yty;}

	logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));

	return logpost;
}


double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, const gsl_vector *Xty, const double yty, const size_t s_size, gsl_vector *Xb, gsl_vector *beta, class HYPBSLMM &cHyp)
{
	double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
	double logpost=0.0;
	double d, P_yy=yty, logdet_O=0.0;

	gsl_matrix_const_view Xgamma_sub=gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size);
	gsl_matrix_const_view XtX_sub=gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size);
	gsl_vector_const_view Xty_sub=gsl_vector_const_subvector (Xty, 0, s_size);

	gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size);
	gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size);
	gsl_vector *beta_hat=gsl_vector_alloc (s_size);
	gsl_vector *Xty_temp=gsl_vector_alloc (s_size);

	gsl_vector_memcpy (Xty_temp, &Xty_sub.vector);

	//calculate Omega
	gsl_matrix_memcpy (Omega, &XtX_sub.matrix);
	gsl_matrix_scale (Omega, sigma_a2);
	gsl_matrix_set_identity (M_temp);
	gsl_matrix_add (Omega, M_temp);

	//calculate beta_hat
	logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
	gsl_vector_scale (beta_hat, sigma_a2);

	gsl_blas_ddot (Xty_temp, beta_hat, &d);
	P_yy-=d;

	//sample tau
	double tau=1.0;
	if (a_mode==11) {tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0,  2.0/P_yy); }

	//sample beta
	for (size_t i=0; i<s_size; i++)
	{
		d=gsl_ran_gaussian(gsl_r, 1);
		gsl_vector_set(beta, i, d);
	}
	gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size);
	gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, &beta_sub.vector);

	//it compuates inv(L^T(Omega)) %*% beta;
	gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau));
	gsl_vector_add(&beta_sub.vector, beta_hat);
	gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0, Xb);

	//for quantitative traits, calculate pve and pge
	if (a_mode==11) {
		gsl_blas_ddot (Xb, Xb, &d);
		cHyp.pve=d/(double)ni_test;
		cHyp.pve/=cHyp.pve+1.0/tau;
		cHyp.pge=1.0;
	}

	logpost=-0.5*logdet_O;
	if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
	else {logpost-=0.5*P_yy;}

	logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));

	gsl_matrix_free (Omega);
	gsl_matrix_free (M_temp);
	gsl_vector_free (beta_hat);
	gsl_vector_free (Xty_temp);

	return logpost;
}
*/