/*
	Genome-wide Efficient Mixed Model Association (GEMMA)
    Copyright (C) 2011  Xiang Zhou

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <iomanip>
#include <bitset>
#include <vector>
#include <map>
#include <set>
#include <cstring>
#include <cmath>
#include <stdio.h>
#include <stdlib.h>

#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"

#include "lapack.h"
#include "gzstream.h"
#include "mathfunc.h"
#include "eigenlib.h"

#ifdef FORCE_FLOAT
#include "io_float.h"
#else
#include "io.h"
#endif


using namespace std;



//Print process bar
void ProgressBar (string str, double p, double total)
{
	double progress = (100.0 * p / total);
	int barsize = (int) (progress / 2.0);
	char bar[51];

	cout<<str;
	for (int i = 0; i <50; i++) {
		if (i<barsize) {bar[i] = '=';}
		else {bar[i]=' ';}
		cout<<bar[i];
	}
	cout<<setprecision(2)<<fixed<<progress<<"%\r"<<flush;

	return;
}


//Print process bar (with acceptance ratio)
void ProgressBar (string str, double p, double total, double ratio)
{
	double progress = (100.0 * p / total);
	int barsize = (int) (progress / 2.0);
	char bar[51];

	cout<<str;
	for (int i = 0; i <50; i++) {
		if (i<barsize) {bar[i] = '=';}
		else {bar[i]=' ';}
		cout<<bar[i];
	}
	cout<<setprecision(2)<<fixed<<progress<<"%    "<<ratio<<"\r"<<flush;


	return;
}


bool isBlankLine(char const* line)
{
    for ( char const* cp = line; *cp; ++cp )
    {
        if ( !isspace(*cp) ) return false;
    }
    return true;
}

bool isBlankLine(std::string const& line)
{
   return isBlankLine(line.c_str());
}

// in case files are ended with "\r" or "\r\n"
std::istream& safeGetline(std::istream& is, std::string& t)
{
    t.clear();

    // The characters in the stream are read one-by-one using a std::streambuf.
    // That is faster than reading them one-by-one using the std::istream.
    // Code that uses streambuf this way must be guarded by a sentry object.
    // The sentry object performs various tasks,
    // such as thread synchronization and updating the stream state.

    std::istream::sentry se(is, true);
    std::streambuf* sb = is.rdbuf();

    for(;;) {
        int c = sb->sbumpc();
        switch (c) {
        case '\n':
            return is;
        case '\r':
            if(sb->sgetc() == '\n')
                sb->sbumpc();
            return is;
        case EOF:
            // Also handle the case when the last line has no line ending
            if(t.empty())
                is.setstate(std::ios::eofbit);
            return is;
        default:
            t += (char)c;
        }
    }
}

//Read snp file
bool ReadFile_snps (const string &file_snps, set<string> &setSnps)
{
	setSnps.clear();

	//ifstream infile (file_snps.c_str(), ifstream::in);
	//if (!infile) {cout<<"error! fail to open snps file: "<<file_snps<<endl; return false;}

	igzstream infile (file_snps.c_str(), igzstream::in);
	if (!infile) {cout<<"error! fail to open snps file: "<<file_snps<<endl; return false;}

	string line;
	char *ch_ptr;

	while (getline(infile, line)) {
		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		setSnps.insert(ch_ptr);
	}

	infile.close();
	infile.clear();

	return true;
}


bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps)
{
	setSnps.clear();

	//ifstream infile (file_snps.c_str(), ifstream::in);
	//if (!infile) {cout<<"error! fail to open snps file: "<<file_snps<<endl; return false;}

	igzstream infile (file_snps.c_str(), igzstream::in);
	if (!infile) {cout<<"error! fail to open snps file: "<<file_snps<<endl; return false;}

	string line, rs, chr, pos;
	char *ch_ptr;

	//read header
	HEADER header;
	!safeGetline(infile, line).eof();
	ReadHeader (line, header);

	if (header.rs_col==0 && (header.chr_col==0 || header.pos_col==0) ) {
	  cout<<"missing rs id in the hearder"<<endl;
	}

	while (!safeGetline(infile, line).eof()) {
	  if (isBlankLine(line)) {continue;}
	  ch_ptr=strtok ((char *)line.c_str(), " , \t");

	  for (size_t i=0; i<header.coln; i++) {
	    if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
	    if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
	    if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}

	    ch_ptr=strtok (NULL, " , \t");
	  }

	  if (header.rs_col==0) {
	    rs=chr+":"+pos;
	  }

	  setSnps.insert(rs);
	}

	infile.close();
	infile.clear();

	return true;
}


//Read log file
bool ReadFile_log (const string &file_log, double &pheno_mean)
{
	ifstream infile (file_log.c_str(), ifstream::in);
	if (!infile) {cout<<"error! fail to open log file: "<<file_log<<endl; return false;}

	string line;
	char *ch_ptr;
	size_t flag=0;

	while (getline(infile, line)) {
		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		ch_ptr=strtok (NULL, " , \t");

		if (ch_ptr!=NULL && strcmp(ch_ptr, "estimated")==0) {
			ch_ptr=strtok (NULL, " , \t");
			if (ch_ptr!=NULL && strcmp(ch_ptr, "mean")==0) {
				ch_ptr=strtok (NULL, " , \t");
				if (ch_ptr!=NULL && strcmp(ch_ptr, "=")==0) {
					ch_ptr=strtok (NULL, " , \t");
					pheno_mean=atof(ch_ptr);
					flag=1;
				}
			}
		}

		if (flag==1) {break;}
	}

	infile.close();
	infile.clear();

	return true;
}


//Read bimbam annotation file
bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM)
{
	mapRS2chr.clear();
	mapRS2bp.clear();

	ifstream infile (file_anno.c_str(), ifstream::in);
	if (!infile) {cout<<"error opening annotation file: "<<file_anno<<endl; return false;}

	string line;
	char *ch_ptr;

	string rs;
	long int b_pos;
	string chr;
	double cM;

	while (!safeGetline(infile, line).eof()) {
		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		rs=ch_ptr;
		ch_ptr=strtok (NULL, " , \t");
		if (strcmp(ch_ptr, "NA")==0) {b_pos=-9;} else {b_pos=atol(ch_ptr);}
		ch_ptr=strtok (NULL, " , \t");
		if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {chr="-9";} else {chr=ch_ptr;}
		ch_ptr=strtok (NULL, " , \t");
		if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {cM=-9;} else {cM=atof(ch_ptr);}

		mapRS2chr[rs]=chr;
		mapRS2bp[rs]=b_pos;
		mapRS2cM[rs]=cM;
	}

	infile.close();
	infile.clear();

	return true;
}

//read one column of phenotype
bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, vector<double> &pheno, const int &p_column)
{
	indicator_idv.clear();
	pheno.clear();

	igzstream infile (file_pheno.c_str(), igzstream::in);
//	ifstream infile (file_pheno.c_str(), ifstream::in);
	if (!infile) {cout<<"error! fail to open phenotype file: "<<file_pheno<<endl; return false;}

	string line;
	char *ch_ptr;

	string id;
	double p;
	while (!safeGetline(infile, line).eof()) {
		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		for (int i=0; i<(p_column-1); ++i) {
			ch_ptr=strtok (NULL, " , \t");
		}
		if (strcmp(ch_ptr, "NA")==0) {indicator_idv.push_back(0); pheno.push_back(-9);}		//pheno is different from pimass2
		else {p=atof(ch_ptr); indicator_idv.push_back(1); pheno.push_back(p);}
	}

	infile.close();
	infile.clear();

	return true;
}



//Read bimbam phenotype file, p_column=1, 2 ...
bool ReadFile_pheno (const string &file_pheno, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, const vector<size_t> &p_column)
{
	indicator_pheno.clear();
	pheno.clear();

	igzstream infile (file_pheno.c_str(), igzstream::in);
//	ifstream infile (file_pheno.c_str(), ifstream::in);
	if (!infile) {cout<<"error! fail to open phenotype file: "<<file_pheno<<endl; return false;}

	string line;
	char *ch_ptr;

	string id;
	double p;

	vector<double> pheno_row;
	vector<int> ind_pheno_row;

	size_t p_max=*max_element(p_column.begin(), p_column.end() );
	map<size_t, size_t> mapP2c;
	for (size_t i=0; i<p_column.size(); i++) {
		mapP2c[p_column[i]]=i;
		pheno_row.push_back(-9);
		ind_pheno_row.push_back(0);
	}

	while (!safeGetline(infile, line).eof()) {
		ch_ptr=strtok ((char *)line.c_str(), " , \t");

		size_t i=0;
		while (i<p_max ) {
			if (mapP2c.count(i+1)!=0) {
				if (strcmp(ch_ptr, "NA")==0) {ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;}
				else {p=atof(ch_ptr); ind_pheno_row[mapP2c[i+1]]=1; pheno_row[mapP2c[i+1]]=p;}
			}
			i++;
			ch_ptr=strtok (NULL, " , \t");
		}

		indicator_pheno.push_back(ind_pheno_row);
		pheno.push_back(pheno_row);
	}

	infile.close();
	infile.clear();

	return true;
}


bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt)
{
	indicator_cvt.clear();

	ifstream infile (file_cvt.c_str(), ifstream::in);
	if (!infile) {cout<<"error! fail to open covariates file: "<<file_cvt<<endl; return false;}

	string line;
	char *ch_ptr;
	double d;

	int flag_na=0;

	while (!safeGetline(infile, line).eof()) {
		vector<double> v_d; flag_na=0;
		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		while (ch_ptr!=NULL) {
			if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
			else {d=atof(ch_ptr);}

			v_d.push_back(d);
			ch_ptr=strtok (NULL, " , \t");
		}
		if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);}
		cvt.push_back(v_d);
	}

	if (indicator_cvt.empty()) {n_cvt=0;}
	else {
		flag_na=0;
		for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
			if (indicator_cvt[i]==0) {continue;}

			if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
			if (flag_na!=0 && n_cvt!=cvt[i].size()) {cout<<"error! number of covariates in row "<<i<<" do not match other rows."<<endl; return false;}
		}
	}

	infile.close();
	infile.clear();

	return true;
}



//Read .bim file
bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo)
{
  snpInfo.clear();

	ifstream infile (file_bim.c_str(), ifstream::in);
	if (!infile) {cout<<"error opening .bim file: "<<file_bim<<endl; return false;}

	string line;
	char *ch_ptr;

	string rs;
	long int b_pos;
	string chr;
	double cM;
	string major;
	string minor;

	while (getline(infile, line)) {
		ch_ptr=strtok ((char *)line.c_str(), " \t");
		chr=ch_ptr;
		ch_ptr=strtok (NULL, " \t");
		rs=ch_ptr;
		ch_ptr=strtok (NULL, " \t");
		cM=atof(ch_ptr);
		ch_ptr=strtok (NULL, " \t");
		b_pos=atol(ch_ptr);
		ch_ptr=strtok (NULL, " \t");
		minor=ch_ptr;
		ch_ptr=strtok (NULL, " \t");
		major=ch_ptr;

		SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0};
		snpInfo.push_back(sInfo);
	}

	infile.close();
	infile.clear();
	return true;
}


//Read .fam file
bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, map<string, int> &mapID2num, const vector<size_t> &p_column)
{
	indicator_pheno.clear();
	pheno.clear();
	mapID2num.clear();

	igzstream infile (file_fam.c_str(), igzstream::in);
	//ifstream infile (file_fam.c_str(), ifstream::in);
	if (!infile) {cout<<"error opening .fam file: "<<file_fam<<endl; return false;}

	string line;
	char *ch_ptr;

	string id;
	int c=0;
	double p;

	vector<double> pheno_row;
	vector<int> ind_pheno_row;

	size_t p_max=*max_element(p_column.begin(), p_column.end() );
	map<size_t, size_t> mapP2c;
	for (size_t i=0; i<p_column.size(); i++) {
		mapP2c[p_column[i]]=i;
		pheno_row.push_back(-9);
		ind_pheno_row.push_back(0);
	}

	while (!safeGetline(infile, line).eof()) {
		ch_ptr=strtok ((char *)line.c_str(), " \t");
		ch_ptr=strtok (NULL, " \t");
		id=ch_ptr;
		ch_ptr=strtok (NULL, " \t");
		ch_ptr=strtok (NULL, " \t");
		ch_ptr=strtok (NULL, " \t");
		ch_ptr=strtok (NULL, " \t");

		size_t i=0;
		while (i<p_max ) {
			if (mapP2c.count(i+1)!=0 ) {
				if (strcmp(ch_ptr, "NA")==0) {
					ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;
				} else {
					p=atof(ch_ptr);

					if (p==-9) {ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;}
					else {ind_pheno_row[mapP2c[i+1]]=1; pheno_row[mapP2c[i+1]]=p;}
				}
			}
			i++;
			ch_ptr=strtok (NULL, " , \t");
		}

		indicator_pheno.push_back(ind_pheno_row);
		pheno.push_back(pheno_row);

		mapID2num[id]=c; c++;
	}

	infile.close();
	infile.clear();
	return true;
}






//Read bimbam mean genotype file, the first time, to obtain #SNPs for analysis (ns_test) and total #SNP (ns_total)
bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, size_t &ns_test)
{
	indicator_snp.clear();
	snpInfo.clear();

	igzstream infile (file_geno.c_str(), igzstream::in);
//	ifstream infile (file_geno.c_str(), ifstream::in);
	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}

	gsl_vector *genotype=gsl_vector_alloc (W->size1);
	gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);

	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
	//eigenlib_dgemm("T", "N", 1.0, W, W, 0.0, WtW);
	int sig;
	LUDecomp (WtW, pmt, &sig);
	LUInvert (WtW, pmt, WtWi);

	double v_x, v_w;
	int c_idv=0;

	string line;
	char *ch_ptr;

	string rs;
	long int b_pos;
	string chr;
	string major;
	string minor;
	double cM;
	size_t file_pos;

	double maf, geno, geno_old;
	size_t n_miss;
	size_t n_0, n_1, n_2;
	int flag_poly;

	int ni_total=indicator_idv.size();
	int ni_test=0;
	for (int i=0; i<ni_total; ++i) {
		ni_test+=indicator_idv[i];
	}
	ns_test=0;

	file_pos=0;
	while (!safeGetline(infile, line).eof()) {
		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		rs=ch_ptr;
		ch_ptr=strtok (NULL, " , \t");
		minor=ch_ptr;
		ch_ptr=strtok (NULL, " , \t");
		major=ch_ptr;

		if (setSnps.size()!=0 && setSnps.count(rs)==0) {
		  SNPINFO sInfo={"-9", rs, -9, -9, minor, major, 0, -9, -9, 0, 0, file_pos};
		  snpInfo.push_back(sInfo);
		  indicator_snp.push_back(0);

		  file_pos++;
		  continue;
		}

		if (mapRS2bp.count(rs)==0) {chr="-9"; b_pos=-9;cM=-9;}
		else {b_pos=mapRS2bp[rs]; chr=mapRS2chr[rs]; cM=mapRS2cM[rs];}

		maf=0; n_miss=0; flag_poly=0; geno_old=-9;
		n_0=0; n_1=0; n_2=0;
		c_idv=0; gsl_vector_set_zero (genotype_miss);
		for (int i=0; i<ni_total; ++i) {
			ch_ptr=strtok (NULL, " , \t");
			if (indicator_idv[i]==0) {continue;}

			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue;}

			geno=atof(ch_ptr);
			if (geno>=0 && geno<=0.5) {n_0++;}
			if (geno>0.5 && geno<1.5) {n_1++;}
			if (geno>=1.5 && geno<=2.0) {n_2++;}

			gsl_vector_set (genotype, c_idv, geno);

//			if (geno<0) {n_miss++; continue;}

			if (flag_poly==0) {geno_old=geno; flag_poly=2;}
			if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}

			maf+=geno;

			c_idv++;
		}
		maf/=2.0*(double)(ni_test-n_miss);

		SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf, ni_test-n_miss, 0, file_pos};
		snpInfo.push_back(sInfo);
		file_pos++;

		if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}

		if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}

		if (flag_poly!=1) {indicator_snp.push_back(0); continue;}

		if (hwe_level!=0 && maf_level!=-1) {
			if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
		}

		//filter SNP if it is correlated with W
		//unless W has only one column, of 1s
		for (size_t i=0; i<genotype->size; ++i) {
			if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
		}

		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
		gsl_blas_ddot (genotype, genotype, &v_x);
		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);

		if (W->size2!=1 && v_w/v_x >= r2_level) {indicator_snp.push_back(0); continue;}

		indicator_snp.push_back(1);
		ns_test++;
	}

	gsl_vector_free (genotype);
	gsl_vector_free (genotype_miss);
	gsl_matrix_free (WtW);
	gsl_matrix_free (WtWi);
	gsl_vector_free (Wtx);
	gsl_vector_free (WtWiWtx);
	gsl_permutation_free (pmt);

	infile.close();
	infile.clear();

	return true;
}






//Read bed file, the first time
bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test)
{
	indicator_snp.clear();
	size_t ns_total=snpInfo.size();

	ifstream infile (file_bed.c_str(), ios::binary);
	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}

	gsl_vector *genotype=gsl_vector_alloc (W->size1);
	gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);

	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
	int sig;
	LUDecomp (WtW, pmt, &sig);
	LUInvert (WtW, pmt, WtWi);

	double v_x, v_w, geno;
	size_t c_idv=0;

	char ch[1];
	bitset<8> b;

	size_t ni_total=indicator_idv.size();
	size_t ni_test=0;
	for (size_t i=0; i<ni_total; ++i) {
		ni_test+=indicator_idv[i];
	}
	ns_test=0;

	//calculate n_bit and c, the number of bit for each snp
	size_t n_bit;
	if (ni_total%4==0) {n_bit=ni_total/4;}
	else {n_bit=ni_total/4+1;}

	//ignore the first three majic numbers
	for (int i=0; i<3; ++i) {
		infile.read(ch,1);
		b=ch[0];
	}

	double maf;
	size_t n_miss;
	size_t n_0, n_1, n_2, c;

	//start reading snps and doing association test
	for (size_t t=0; t<ns_total; ++t) {
	  infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers

		if (setSnps.size()!=0 && setSnps.count(snpInfo[t].rs_number)==0) {
			snpInfo[t].n_miss=-9;
			snpInfo[t].missingness=-9;
			snpInfo[t].maf=-9;
			snpInfo[t].file_position=t;
			indicator_snp.push_back(0);
			continue;
		}

		//read genotypes
		c=0; maf=0.0; n_miss=0; n_0=0; n_1=0; n_2=0;
		c_idv=0; gsl_vector_set_zero (genotype_miss);
		for (size_t i=0; i<n_bit; ++i) {
			infile.read(ch,1);
			b=ch[0];
			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
				if ((i==(n_bit-1)) && c==ni_total) {break;}
				if (indicator_idv[c]==0) {c++; continue;}
				c++;

				if (b[2*j]==0) {
					if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); maf+=2.0; n_2++;}
					else {gsl_vector_set(genotype, c_idv, 1.0); maf+=1.0; n_1++;}
				}
				else {
					if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); maf+=0.0; n_0++;}
					else {gsl_vector_set(genotype_miss, c_idv, 1); n_miss++; }
				}
				c_idv++;
			}
		}
		maf/=2.0*(double)(ni_test-n_miss);

		snpInfo[t].n_miss=n_miss;
		snpInfo[t].missingness=(double)n_miss/(double)ni_test;
		snpInfo[t].maf=maf;
		snpInfo[t].n_idv=ni_test-n_miss;
		snpInfo[t].n_nb=0;
		snpInfo[t].file_position=t;

		if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}

		if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}

		if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) {indicator_snp.push_back(0); continue;}

		if (hwe_level!=0 && maf_level!=-1) {
			if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
		}

		//filter SNP if it is correlated with W
		//unless W has only one column, of 1s
		for (size_t i=0; i<genotype->size; ++i) {
			if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
		}

		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
		gsl_blas_ddot (genotype, genotype, &v_x);
		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);

		if (W->size2!=1 && v_w/v_x > r2_level) {indicator_snp.push_back(0); continue;}

		indicator_snp.push_back(1);
		ns_test++;
	}

	gsl_vector_free (genotype);
	gsl_vector_free (genotype_miss);
	gsl_matrix_free (WtW);
	gsl_matrix_free (WtWi);
	gsl_vector_free (Wtx);
	gsl_vector_free (WtWiWtx);
	gsl_permutation_free (pmt);

	infile.close();
	infile.clear();

	return true;
}





//read the genotype for one SNP; remember to read empty lines
//geno stores original genotypes without centering
//missing values are replaced by mean
bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv, igzstream &infile, gsl_vector *geno, double &geno_mean)
{
  size_t ni_total=indicator_idv.size();

  //  if (infile.eof()) {infile.clear();}
  //  infile.seekg(pos);

  string line;
  char *ch_ptr;
  bool flag=false;

  for (size_t i=0; i<inc; i++) {
    !safeGetline(infile, line).eof();
  }

  if (!safeGetline(infile, line).eof()) {
    ch_ptr=strtok ((char *)line.c_str(), " , \t");
    ch_ptr=strtok (NULL, " , \t");
    ch_ptr=strtok (NULL, " , \t");

    geno_mean=0.0;
    double d;
    size_t c_idv=0;
    vector<size_t> geno_miss;

    for (size_t i=0; i<ni_total; ++i) {
      ch_ptr=strtok (NULL, " , \t");
      if (indicator_idv[i]==0) {continue;}

      if (strcmp(ch_ptr, "NA")==0) {
	geno_miss.push_back(c_idv);
      } else {
	d=atof(ch_ptr);
	gsl_vector_set (geno, c_idv, d);
	geno_mean+=d;
      }
      c_idv++;
    }

    geno_mean/=(double)(c_idv-geno_miss.size() );

    for (size_t i=0; i<geno_miss.size(); ++i) {
      gsl_vector_set(geno, geno_miss[i], geno_mean);
    }
    flag=true;
  }

  return flag;
}


//for plink, store SNPs as double too
void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, ifstream &infile, gsl_vector *geno, double &geno_mean)
{
  size_t ni_total=indicator_idv.size(), n_bit;
  if (ni_total%4==0) {n_bit=ni_total/4;}
  else {n_bit=ni_total/4+1;}
  infile.seekg(pos*n_bit+3); //n_bit, and 3 is the number of magic numbers

  //read genotypes
  char ch[1];
  bitset<8> b;

  geno_mean=0.0;
  size_t c=0, c_idv=0;
  vector<size_t> geno_miss;

  for (size_t i=0; i<n_bit; ++i) {
    infile.read(ch,1);
    b=ch[0];
    for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
      if ((i==(n_bit-1)) && c==ni_total) {break;}
      if (indicator_idv[c]==0) {c++; continue;}
      c++;

      if (b[2*j]==0) {
	if (b[2*j+1]==0) {
	  gsl_vector_set (geno, c_idv, 2);
	  geno_mean+=2.0;
	} else {
	  gsl_vector_set (geno, c_idv, 1);
	  geno_mean+=1.0;
	}
      } else {
	if (b[2*j+1]==1) {
	  gsl_vector_set (geno, c_idv, 0);
	  geno_mean+=0.0;
	} else {
	  geno_miss.push_back(c_idv);
	}
      }

      c_idv++;
    }
  }

  geno_mean/=(double)(c_idv-geno_miss.size());

  for (size_t i=0; i<geno_miss.size(); ++i) {
    gsl_vector_set(geno, geno_miss[i], geno_mean);
  }

  return;
}





void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
{
	igzstream infile (file_kin.c_str(), igzstream::in);
//	ifstream infile (file_kin.c_str(), ifstream::in);
	if (!infile) {cout<<"error! fail to open kinship file: "<<file_kin<<endl; error=true; return;}

	size_t ni_total=indicator_idv.size();

	gsl_matrix_set_zero (G);

	string line;
	char *ch_ptr;
	double d;

	if (k_mode==1) {
		size_t i_test=0, i_total=0, j_test=0, j_total=0;
		while (getline(infile, line)) {
			if (i_total==ni_total) {cout<<"error! number of rows in the kinship file is larger than the number of phentypes."<<endl; error=true;}

			if (indicator_idv[i_total]==0) {i_total++; continue;}

			j_total=0; j_test=0;
			ch_ptr=strtok ((char *)line.c_str(), " , \t");
			while (ch_ptr!=NULL) {
				if (j_total==ni_total) {cout<<"error! number of columns in the kinship file is larger than the number of phentypes for row = "<<i_total<<endl; error=true;}

				d=atof(ch_ptr);
				if (indicator_idv[j_total]==1) {gsl_matrix_set (G, i_test, j_test, d); j_test++;}
				j_total++;

				ch_ptr=strtok (NULL, " , \t");
			}
			if (j_total!=ni_total) {cout<<"error! number of columns in the kinship file do not match the number of phentypes for row = "<<i_total<<endl; error=true;}
			i_total++; i_test++;
		}
		if (i_total!=ni_total) {cout<<"error! number of rows in the kinship file do not match the number of phentypes."<<endl; error=true;}
	}
	else {
		map<size_t, size_t> mapID2ID;
		size_t c=0;
		for (size_t i=0; i<indicator_idv.size(); i++) {
			if (indicator_idv[i]==1) {mapID2ID[i]=c; c++;}
		}

		string id1, id2;
		double Cov_d;
		size_t n_id1, n_id2;

		while (getline(infile, line)) {
			ch_ptr=strtok ((char *)line.c_str(), " , \t");
			id1=ch_ptr;
			ch_ptr=strtok (NULL, " , \t");
			id2=ch_ptr;
			ch_ptr=strtok (NULL, " , \t");
			d=atof(ch_ptr);
			if (mapID2num.count(id1)==0 || mapID2num.count(id2)==0) {continue;}
			if (indicator_idv[mapID2num[id1]]==0 || indicator_idv[mapID2num[id2]]==0) {continue;}

			n_id1=mapID2ID[mapID2num[id1]];
			n_id2=mapID2ID[mapID2num[id2]];

			Cov_d=gsl_matrix_get(G, n_id1, n_id2);
			if (Cov_d!=0 && Cov_d!=d) {cout<<"error! redundant and unequal terms in the kinship file, for id1 = "<<id1<<" and id2 = "<<id2<<endl;}
			else {
				gsl_matrix_set(G, n_id1, n_id2, d);
				gsl_matrix_set(G, n_id2, n_id1, d);
			}
		}
	}

	infile.close();
	infile.clear();

	return;
}


void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
{
	igzstream infile (file_mk.c_str(), igzstream::in);
	if (!infile) {cout<<"error! fail to open file: "<<file_mk<<endl; error=true; return;}

	string file_kin, line;

	size_t i=0;
	while (getline(infile, line)) {
	  file_kin=line.c_str();
	  gsl_matrix_view G_sub=gsl_matrix_submatrix(G, 0, i*G->size1, G->size1, G->size1);
	  ReadFile_kin (file_kin, indicator_idv, mapID2num, k_mode, error, &G_sub.matrix);
	  i++;
	}

	infile.close();
	infile.clear();
	return;
}


void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U)
{
	igzstream infile (file_ku.c_str(), igzstream::in);
//	ifstream infile (file_ku.c_str(), ifstream::in);
	if (!infile) {cout<<"error! fail to open the U file: "<<file_ku<<endl; error=true; return;}

	size_t n_row=U->size1, n_col=U->size2, i_row=0, i_col=0;

	gsl_matrix_set_zero (U);

	string line;
	char *ch_ptr;
	double d;

	while (getline(infile, line)) {
		if (i_row==n_row) {cout<<"error! number of rows in the U file is larger than expected."<<endl; error=true;}

		i_col=0;
		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		while (ch_ptr!=NULL) {
			if (i_col==n_col) {cout<<"error! number of columns in the U file is larger than expected, for row = "<<i_row<<endl; error=true;}

			d=atof(ch_ptr);
			gsl_matrix_set (U, i_row, i_col, d);
			i_col++;

			ch_ptr=strtok (NULL, " , \t");
		}

		i_row++;
	}

	infile.close();
	infile.clear();

	return;
}




void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval)
{
	igzstream infile (file_kd.c_str(), igzstream::in);
//	ifstream infile (file_kd.c_str(), ifstream::in);
	if (!infile) {cout<<"error! fail to open the D file: "<<file_kd<<endl; error=true; return;}

	size_t n_row=eval->size, i_row=0;

	gsl_vector_set_zero (eval);

	string line;
	char *ch_ptr;
	double d;

	while (getline(infile, line)) {
		if (i_row==n_row) {cout<<"error! number of rows in the D file is larger than expected."<<endl; error=true;}

		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		d=atof(ch_ptr);

		ch_ptr=strtok (NULL, " , \t");
		if (ch_ptr!=NULL) {cout<<"error! number of columns in the D file is larger than expected, for row = "<<i_row<<endl; error=true;}

		gsl_vector_set (eval, i_row, d);

		i_row++;
	}

	infile.close();
	infile.clear();

	return;
}



//read bimbam mean genotype file and calculate kinship matrix
bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
{
	igzstream infile (file_geno.c_str(), igzstream::in);
	//ifstream infile (file_geno.c_str(), ifstream::in);
	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}

	string line;
	char *ch_ptr;

	size_t n_miss;
	double d, geno_mean, geno_var;

	size_t ni_total=matrix_kin->size1;
	gsl_vector *geno=gsl_vector_alloc (ni_total);
	gsl_vector *geno_miss=gsl_vector_alloc (ni_total);

	//create a large matrix
	size_t msize=10000;
	gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize);
	gsl_matrix_set_zero(Xlarge);

	size_t ns_test=0;
	for (size_t t=0; t<indicator_snp.size(); ++t) {
		!safeGetline(infile, line).eof();
		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
		if (indicator_snp[t]==0) {continue;}

		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		ch_ptr=strtok (NULL, " , \t");
		ch_ptr=strtok (NULL, " , \t");

		geno_mean=0.0; n_miss=0; geno_var=0.0;
		gsl_vector_set_all(geno_miss, 0);
		for (size_t i=0; i<ni_total; ++i) {
			ch_ptr=strtok (NULL, " , \t");
			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(geno_miss, i, 0); n_miss++;}
			else {
				d=atof(ch_ptr);
				gsl_vector_set (geno, i, d);
				gsl_vector_set (geno_miss, i, 1);
				geno_mean+=d;
				geno_var+=d*d;
			}
		}

		geno_mean/=(double)(ni_total-n_miss);
		geno_var+=geno_mean*geno_mean*(double)n_miss;
		geno_var/=(double)ni_total;
		geno_var-=geno_mean*geno_mean;
//		geno_var=geno_mean*(1-geno_mean*0.5);

		for (size_t i=0; i<ni_total; ++i) {
			if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
		}

		gsl_vector_add_constant (geno, -1.0*geno_mean);

		/*
		if (geno_var!=0) {
		  if (k_mode==1) {
		    gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);
		    //eigenlib_dsyr (1.0, geno, matrix_kin);
		  } else if (k_mode==2) {
		    gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);
		    //eigenlib_dsyr (1.0/geno_var, geno, matrix_kin);
		  } else {
		    cout<<"Unknown kinship mode."<<endl;
		  }
		}
		*/

		if (k_mode==2 && geno_var!=0) {gsl_vector_scale (geno, 1.0/sqrt(geno_var));}
		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, ns_test%msize);
		gsl_vector_memcpy (&Xlarge_col.vector, geno);

		ns_test++;

		if (ns_test%msize==0) {
		  eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
		  gsl_matrix_set_zero(Xlarge);
		}
	}

	if (ns_test%msize!=0) {
	  eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
	}
	cout<<endl;

	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);

	for (size_t i=0; i<ni_total; ++i) {
		for (size_t j=0; j<i; ++j) {
			d=gsl_matrix_get (matrix_kin, j, i);
			gsl_matrix_set (matrix_kin, i, j, d);
		}
	}

	gsl_vector_free (geno);
	gsl_vector_free (geno_miss);
	gsl_matrix_free (Xlarge);

	infile.close();
	infile.clear();

	return true;
}







bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
{
	ifstream infile (file_bed.c_str(), ios::binary);
	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}

	char ch[1];
	bitset<8> b;

	size_t n_miss, ci_total;
	double d, geno_mean, geno_var;

	size_t ni_total=matrix_kin->size1;
	gsl_vector *geno=gsl_vector_alloc (ni_total);

	size_t ns_test=0;
	int n_bit;

	//create a large matrix
	size_t msize=10000;
	gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize);
	gsl_matrix_set_zero(Xlarge);

	//calculate n_bit and c, the number of bit for each snp
	if (ni_total%4==0) {n_bit=ni_total/4;}
	else {n_bit=ni_total/4+1; }

	//print the first three magic numbers
	for (int i=0; i<3; ++i) {
		infile.read(ch,1);
		b=ch[0];
	}

	for (size_t t=0; t<indicator_snp.size(); ++t) {
		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
		if (indicator_snp[t]==0) {continue;}

		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers

		//read genotypes
		geno_mean=0.0;	n_miss=0; ci_total=0; geno_var=0.0;
		for (int i=0; i<n_bit; ++i) {
			infile.read(ch,1);
			b=ch[0];
			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
				if ((i==(n_bit-1)) && ci_total==ni_total) {break;}

				if (b[2*j]==0) {
					if (b[2*j+1]==0) {gsl_vector_set(geno, ci_total, 2.0); geno_mean+=2.0; geno_var+=4.0; }
					else {gsl_vector_set(geno, ci_total, 1.0); geno_mean+=1.0; geno_var+=1.0;}
				}
				else {
					if (b[2*j+1]==1) {gsl_vector_set(geno, ci_total, 0.0); }
					else {gsl_vector_set(geno, ci_total, -9.0); n_miss++; }
				}

				ci_total++;
			}
		}

		geno_mean/=(double)(ni_total-n_miss);
		geno_var+=geno_mean*geno_mean*(double)n_miss;
		geno_var/=(double)ni_total;
		geno_var-=geno_mean*geno_mean;
//		geno_var=geno_mean*(1-geno_mean*0.5);

		for (size_t i=0; i<ni_total; ++i) {
			d=gsl_vector_get(geno,i);
			if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
		}

		gsl_vector_add_constant (geno, -1.0*geno_mean);

		/*
		if (geno_var!=0) {
			if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
			else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
			else {cout<<"Unknown kinship mode."<<endl;}
		}
		*/

		if (k_mode==2 && geno_var!=0) {gsl_vector_scale (geno, 1.0/sqrt(geno_var));}
		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, ns_test%msize);
		gsl_vector_memcpy (&Xlarge_col.vector, geno);

		ns_test++;

		if (ns_test%msize==0) {
		  eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
		  gsl_matrix_set_zero(Xlarge);
		}
	}

	if (ns_test%msize!=0) {
	  eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
	}

	cout<<endl;

	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);

	for (size_t i=0; i<ni_total; ++i) {
		for (size_t j=0; j<i; ++j) {
			d=gsl_matrix_get (matrix_kin, j, i);
			gsl_matrix_set (matrix_kin, i, j, d);
		}
	}

	gsl_vector_free (geno);
	gsl_matrix_free (Xlarge);

	infile.close();
	infile.clear();

	return true;
}





//Read bimbam mean genotype file, the second time, recode "mean" genotype and calculate K
bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K)
{
	igzstream infile (file_geno.c_str(), igzstream::in);
//	ifstream infile (file_geno.c_str(), ifstream::in);
	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}

	string line;
	char *ch_ptr;

	if (calc_K==true) {gsl_matrix_set_zero (K);}

	gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
	gsl_vector *genotype_miss=gsl_vector_alloc (UtX->size1);
	double geno, geno_mean;
	size_t n_miss;

	int ni_total=(int)indicator_idv.size();
	int ns_total=(int)indicator_snp.size();
	int ni_test=UtX->size1;
	int ns_test=UtX->size2;

	int c_idv=0, c_snp=0;

	for (int i=0; i<ns_total; ++i) {
		!safeGetline(infile, line).eof();
		if (indicator_snp[i]==0) {continue;}

		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		ch_ptr=strtok (NULL, " , \t");
		ch_ptr=strtok (NULL, " , \t");

		c_idv=0; geno_mean=0; n_miss=0;
		gsl_vector_set_zero (genotype_miss);
		for (int j=0; j<ni_total; ++j) {
			ch_ptr=strtok (NULL, " , \t");
			if (indicator_idv[j]==0) {continue;}

			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++;}
			else {
				geno=atof(ch_ptr);
				gsl_vector_set (genotype, c_idv, geno);
				geno_mean+=geno;
			}
			c_idv++;
		}

		geno_mean/=(double)(ni_test-n_miss);

		for (size_t i=0; i<genotype->size; ++i) {
			if (gsl_vector_get (genotype_miss, i)==1) {geno=0;}
			else {geno=gsl_vector_get (genotype, i); geno-=geno_mean;}

			gsl_vector_set (genotype, i, geno);
			gsl_matrix_set (UtX, i, c_snp, geno);
		}

		if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}

		c_snp++;
	}

	if (calc_K==true) {
		gsl_matrix_scale (K, 1.0/(double)ns_test);

		for (size_t i=0; i<genotype->size; ++i) {
			for (size_t j=0; j<i; ++j) {
				geno=gsl_matrix_get (K, j, i);
				gsl_matrix_set (K, i, j, geno);
			}
		}
	}

	gsl_vector_free (genotype);
	gsl_vector_free (genotype_miss);

	infile.clear();
	infile.close();

	return true;
}



//compact version of the above function, using uchar instead of gsl_matrix
bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test)
{
	igzstream infile (file_geno.c_str(), igzstream::in);
    //	ifstream infile (file_geno.c_str(), ifstream::in);
	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}

	Xt.clear();
	vector<unsigned char> Xt_row;
	for (size_t i=0; i<ni_test; i++) {
	  Xt_row.push_back(0);
	}

	string line;
	char *ch_ptr;

	if (calc_K==true) {gsl_matrix_set_zero (K);}

	gsl_vector *genotype=gsl_vector_alloc (ni_test);
	gsl_vector *genotype_miss=gsl_vector_alloc (ni_test);
	double geno, geno_mean;
	size_t n_miss;

	size_t ni_total= indicator_idv.size();
	size_t ns_total= indicator_snp.size();

	size_t c_idv=0, c_snp=0;

	for (size_t i=0; i<ns_total; ++i) {
		!safeGetline(infile, line).eof();
		if (indicator_snp[i]==0) {continue;}

		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		ch_ptr=strtok (NULL, " , \t");
		ch_ptr=strtok (NULL, " , \t");

		c_idv=0; geno_mean=0; n_miss=0;
		gsl_vector_set_zero (genotype_miss);
		for (uint j=0; j<ni_total; ++j) {
			ch_ptr=strtok (NULL, " , \t");
			if (indicator_idv[j]==0) {continue;}

			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++;} else {
				geno=atof(ch_ptr);
				gsl_vector_set (genotype, c_idv, geno);
				geno_mean+=geno;
			}
			c_idv++;
		}

		geno_mean/=(double)(ni_test-n_miss);

		for (size_t j=0; j<genotype->size; ++j) {
			if (gsl_vector_get (genotype_miss, j)==1) {
			  geno=geno_mean;
			} else {
			  geno=gsl_vector_get (genotype, j);
			}

			Xt_row[j]=Double02ToUchar(geno);
			gsl_vector_set (genotype, j, (geno-geno_mean));
		}
		Xt.push_back(Xt_row);

		if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}

		c_snp++;
	}

	if (calc_K==true) {
		gsl_matrix_scale (K, 1.0/(double)ns_test);

		for (size_t i=0; i<genotype->size; ++i) {
			for (size_t j=0; j<i; ++j) {
				geno=gsl_matrix_get (K, j, i);
				gsl_matrix_set (K, i, j, geno);
			}
		}
	}

	gsl_vector_free (genotype);
	gsl_vector_free (genotype_miss);

	infile.clear();
	infile.close();

	return true;
}




//Read bimbam mean genotype file, the second time, recode "mean" genotype and calculate K
bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K)
{
	ifstream infile (file_bed.c_str(), ios::binary);
	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}

	char ch[1];
	bitset<8> b;

	size_t ni_total=indicator_idv.size();
	size_t ns_total=indicator_snp.size();
	size_t ni_test=UtX->size1;
	size_t ns_test=UtX->size2;
	int n_bit;

	if (ni_total%4==0) {n_bit=ni_total/4;}
	else {n_bit=ni_total/4+1;}

	//print the first three majic numbers
	for (int i=0; i<3; ++i) {
		infile.read(ch,1);
		b=ch[0];
	}

	if (calc_K==true) {gsl_matrix_set_zero (K);}

	gsl_vector *genotype=gsl_vector_alloc (UtX->size1);

	double geno, geno_mean;
	size_t n_miss;
	size_t c_idv=0, c_snp=0, c=0;

	//start reading snps and doing association test
	for (size_t t=0; t<ns_total; ++t) {
		if (indicator_snp[t]==0) {continue;}
		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers

		//read genotypes
		c_idv=0; geno_mean=0.0; n_miss=0; c=0;
		for (int i=0; i<n_bit; ++i) {
			infile.read(ch,1);
			b=ch[0];
			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
			  if ((i==(n_bit-1)) && c==ni_total) {break;}
				if (indicator_idv[c]==0) {c++; continue;}
				c++;

				if (b[2*j]==0) {
					if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); geno_mean+=2.0;}
					else {gsl_vector_set(genotype, c_idv, 1.0); geno_mean+=1.0;}
				}
				else {
					if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
					else {gsl_vector_set(genotype, c_idv, -9.0); n_miss++;}
				}
				c_idv++;
			}
		}

		geno_mean/=(double)(ni_test-n_miss);

		for (size_t i=0; i<genotype->size; ++i) {
			geno=gsl_vector_get (genotype, i);
			if (geno==-9) {geno=0;}
			else {geno-=geno_mean;}

			gsl_vector_set (genotype, i, geno);
			gsl_matrix_set (UtX, i, c_snp, geno);
		}

		if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}

		c_snp++;
	}

	if (calc_K==true) {
		gsl_matrix_scale (K, 1.0/(double)ns_test);

		for (size_t i=0; i<genotype->size; ++i) {
			for (size_t j=0; j<i; ++j) {
				geno=gsl_matrix_get (K, j, i);
				gsl_matrix_set (K, i, j, geno);
			}
		}
	}

	gsl_vector_free (genotype);
	infile.clear();
	infile.close();

	return true;
}




//compact version of the above function, using uchar instead of gsl_matrix
bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test)
{
	ifstream infile (file_bed.c_str(), ios::binary);
	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}

	Xt.clear();
	vector<unsigned char> Xt_row;
	for (size_t i=0; i<ni_test; i++) {
	  Xt_row.push_back(0);
	}

	char ch[1];
	bitset<8> b;

	size_t ni_total=indicator_idv.size();
	size_t ns_total=indicator_snp.size();
	int n_bit;

	if (ni_total%4==0) {n_bit=ni_total/4;}
	else {n_bit=ni_total/4+1;}

	//print the first three majic numbers
	for (int i=0; i<3; ++i) {
		infile.read(ch,1);
		b=ch[0];
	}

	if (calc_K==true) {gsl_matrix_set_zero (K);}

	gsl_vector *genotype=gsl_vector_alloc (ni_test);

	double geno, geno_mean;
	size_t n_miss;
	size_t c_idv=0, c_snp=0, c=0;

	//start reading snps and doing association test
	for (size_t t=0; t<ns_total; ++t) {
		if (indicator_snp[t]==0) {continue;}
		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers

		//read genotypes
		c_idv=0; geno_mean=0.0; n_miss=0; c=0;
		for (int i=0; i<n_bit; ++i) {
			infile.read(ch,1);
			b=ch[0];
			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
			  if ((i==(n_bit-1)) && c==ni_total) {break;}
				if (indicator_idv[c]==0) {c++; continue;}
				c++;

				if (b[2*j]==0) {
					if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); geno_mean+=2.0;}
					else {gsl_vector_set(genotype, c_idv, 1.0); geno_mean+=1.0;}
				}
				else {
					if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
					else {gsl_vector_set(genotype, c_idv, -9.0); n_miss++;}
				}
				c_idv++;
			}
		}

		geno_mean/=(double)(ni_test-n_miss);

		for (size_t i=0; i<genotype->size; ++i) {
			geno=gsl_vector_get (genotype, i);
			if (geno==-9) {geno=geno_mean;}

			Xt_row[i]=Double02ToUchar(geno);

			geno-=geno_mean;

			gsl_vector_set (genotype, i, geno);
		}
		Xt.push_back(Xt_row);

		if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}

		c_snp++;
	}

	if (calc_K==true) {
		gsl_matrix_scale (K, 1.0/(double)ns_test);

		for (size_t i=0; i<genotype->size; ++i) {
			for (size_t j=0; j<i; ++j) {
				geno=gsl_matrix_get (K, j, i);
				gsl_matrix_set (K, i, j, geno);
			}
		}
	}

	gsl_vector_free (genotype);
	infile.clear();
	infile.close();

	return true;
}







bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map<string, double> &mapRS2est)
{
	mapRS2est.clear();

	ifstream infile (file_est.c_str(), ifstream::in);
	if (!infile) {cout<<"error opening estimated parameter file: "<<file_est<<endl; return false;}

	string line;
	char *ch_ptr;

	string rs;
	double alpha, beta, gamma, d;

	//header
	getline(infile, line);

	size_t n=*max_element(est_column.begin(), est_column.end());

	while (getline(infile, line)) {
		ch_ptr=strtok ((char *)line.c_str(), " \t");

		alpha=0.0; beta=0.0; gamma=1.0;
		for (size_t i=0; i<n+1; ++i) {
			if (i==est_column[0]-1) {rs=ch_ptr;}
			if (i==est_column[1]-1) {alpha=atof(ch_ptr);}
			if (i==est_column[2]-1) {beta=atof(ch_ptr);}
			if (i==est_column[3]-1) {gamma=atof(ch_ptr);}
			if (i<n) {ch_ptr=strtok (NULL, " \t");}
		}

		d=alpha+beta*gamma;

		if (mapRS2est.count(rs)==0) {
			mapRS2est[rs]=d;
		}
		else {
			cout<<"the same SNP occurs more than once in estimated parameter file: "<<rs<<endl; return false;
		}
	}

	infile.clear();
	infile.close();
	return true;
}



bool CountFileLines (const string &file_input, size_t &n_lines)
{
	igzstream infile (file_input.c_str(), igzstream::in);
	//ifstream infile (file_input.c_str(), ifstream::in);
	if (!infile) {cout<<"error! fail to open file: "<<file_input<<endl; return false;}

	n_lines=count(istreambuf_iterator<char>(infile), istreambuf_iterator<char>(), '\n');
	infile.seekg (0, ios::beg);

	return true;
}



//Read gene expression file
bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SNPINFO> &snpInfo, size_t &ng_total)
{
	vec_read.clear();
	ng_total=0;

	igzstream infile (file_gene.c_str(), igzstream::in);
	if (!infile) {cout<<"error! fail to open gene expression file: "<<file_gene<<endl; return false;}

	string line;
	char *ch_ptr;
	string rs;

	size_t n_idv=0, t=0;

	//header
	getline(infile, line);

	while (getline(infile, line)) {
		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		rs=ch_ptr;

		ch_ptr=strtok (NULL, " , \t");

		t=0;
		while (ch_ptr!=NULL) {
			if (ng_total==0) {
				vec_read.push_back(0);
				t++;
				n_idv++;
			} else {
				vec_read[t]+=atof(ch_ptr);
				t++;
			}

			ch_ptr=strtok (NULL, " , \t");
		}

		if (t!=n_idv) {cout<<"error! number of columns doesn't match in row: "<<ng_total<<endl; return false;}

		SNPINFO sInfo={"-9", rs, -9, -9, "-9", "-9", 0, -9, -9, 0, 0, 0};
		snpInfo.push_back(sInfo);

		ng_total++;
	}

	infile.close();
	infile.clear();

	return true;
}







// WJA Added
//Read Oxford sample file
bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, const vector<size_t> &p_column, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt)
{
	indicator_pheno.clear();
	pheno.clear();
	indicator_cvt.clear();

	igzstream infile (file_sample.c_str(), igzstream::in);

	if (!infile) {cout<<"error! fail to open sample file: "<<file_sample<<endl; return false;}

	string line;
	char *ch_ptr;


	string id;
	double p,d;

	vector<double> pheno_row;
	vector<int> ind_pheno_row;
	int flag_na=0;

	size_t num_cols=0;
	size_t num_p_in_file=0;
	size_t num_cvt_in_file=0;

//	size_t p_max=*max_element(p_column.begin(), p_column.end());

	map<size_t, size_t> mapP2c;
	for (size_t i=0; i<p_column.size(); i++) {
		mapP2c[p_column[i]]=i;
		pheno_row.push_back(-9);
		ind_pheno_row.push_back(0);
	}

	// read header line1
	if(!safeGetline(infile, line).eof()) {
		ch_ptr=strtok((char *)line.c_str(), " \t");
		if(strcmp(ch_ptr, "ID_1")!=0) {return false;}
		ch_ptr=strtok(NULL, " \t");
		if(strcmp(ch_ptr, "ID_2")!=0) {return false;}
		ch_ptr=strtok(NULL, " \t");
		if(strcmp(ch_ptr, "missing")!=0) {return false;}
		while (ch_ptr!=NULL) {
			num_cols++;
			ch_ptr=strtok (NULL, " \t");

		}
		num_cols--;
	}

	vector<map<uint32_t, size_t> > cvt_factor_levels;

	char col_type[num_cols];
	// read header line2
	if(!safeGetline(infile, line).eof()) {
		ch_ptr=strtok ((char *)line.c_str(), " \t");
		if(strcmp(ch_ptr, "0")!=0) {return false;}
		ch_ptr=strtok(NULL, " \t");
		if(strcmp(ch_ptr, "0")!=0) {return false;}
		ch_ptr=strtok(NULL, " \t");
		if(strcmp(ch_ptr, "0")!=0) {return false;}
		size_t it=0;
		ch_ptr=strtok (NULL, " \t");
		if(ch_ptr!=NULL)
			while(ch_ptr!=NULL){
				col_type[it++]=ch_ptr[0];
				if(ch_ptr[0]=='D') {cvt_factor_levels.push_back(map<uint32_t, size_t>());num_cvt_in_file++;}
				if(ch_ptr[0]=='C') {num_cvt_in_file++;}
				if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) {num_p_in_file++;}
				ch_ptr=strtok(NULL, " \t");
			}

	}

	while (!safeGetline(infile, line).eof()) {

		ch_ptr=strtok ((char *)line.c_str(), " \t");

		for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");}


		size_t i=0;
		size_t p_i=0;
		size_t fac_cvt_i=0;

		while (i<num_cols) {

			if((col_type[i]=='P')||(col_type[i]=='B'))
			{
				if (mapP2c.count(p_i+1)!=0) {
					if (strcmp(ch_ptr, "NA")==0) {ind_pheno_row[mapP2c[p_i+1]]=0; pheno_row[mapP2c[p_i+1]]=-9;}
					else {p=atof(ch_ptr); ind_pheno_row[mapP2c[p_i+1]]=1; pheno_row[mapP2c[p_i+1]]=p;}
				}
				p_i++;
			}
			if(col_type[i]=='D')
			{
				// NOTE THIS DOES NOT CHECK TO BE SURE LEVEL IS INTEGRAL i.e for atoi error
				if (strcmp(ch_ptr, "NA")!=0) {uint32_t level=atoi(ch_ptr); if(cvt_factor_levels[fac_cvt_i].count(level) == 0) {cvt_factor_levels[fac_cvt_i][level]=cvt_factor_levels[fac_cvt_i].size();}}
				fac_cvt_i++;
			}

			ch_ptr=strtok (NULL, " \t");
			i++;
		}


		indicator_pheno.push_back(ind_pheno_row);
		pheno.push_back(pheno_row);

	}
	// close and reopen the file
 	infile.close();
 	infile.clear();

	if(num_cvt_in_file>0)
	{
		igzstream infile2 (file_sample.c_str(), igzstream::in);

		if (!infile2) {cout<<"error! fail to open sample file: "<<file_sample<<endl; return false;}
		// skip header
		safeGetline(infile2, line);
		safeGetline(infile2, line);

		// pull in the covariates now we now the number of factor levels
		while (!safeGetline(infile2, line).eof()) {

			vector<double> v_d; flag_na=0;
			ch_ptr=strtok ((char *)line.c_str(), " \t");

			for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");}


			size_t i=0;
			size_t fac_cvt_i=0;
			size_t num_fac_levels;
			while (i<num_cols) {

				if(col_type[i]=='C')
				{
					if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
					else {d=atof(ch_ptr);}

					v_d.push_back(d);
				}


				if(col_type[i]=='D')
				{
					// NOTE THIS DOES NOT CHECK TO BE SURE LEVEL IS INTEGRAL i.e for atoi error
					num_fac_levels=cvt_factor_levels[fac_cvt_i].size();
					if(num_fac_levels>1)
					{
						if (strcmp(ch_ptr, "NA")==0) {flag_na=1; for(size_t it=0;it<num_fac_levels-1; it++) {v_d.push_back(-9);}}
						else {uint32_t level=atoi(ch_ptr); for(size_t it=0;it<num_fac_levels-1;it++) {cvt_factor_levels[fac_cvt_i][level]==it+1 ? v_d.push_back(1.0) : v_d.push_back(0.0); }}
					}
					fac_cvt_i++;
				}

				ch_ptr=strtok (NULL, " \t");
				i++;
			}

			if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);}
			cvt.push_back(v_d);


		}

		if (indicator_cvt.empty()) {n_cvt=0;}
		else {
			flag_na=0;
			for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
				if (indicator_cvt[i]==0) {continue;}

				if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
					if (flag_na!=0 && n_cvt!=cvt[i].size()) {cout<<"error! number of covariates in row "<<i<<" do not match other rows."<<endl; return false;}
			}
		}

		infile2.close();
		infile2.clear();
	}
 	return true;
}



// WJA Added
//Read bgen file, the first time
#include <cstdint>
#include <assert.h>
bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test)
{

	indicator_snp.clear();

	ifstream infile (file_bgen.c_str(), ios::binary);
	if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return false;}

	gsl_vector *genotype=gsl_vector_alloc (W->size1);
	gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);

	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
	int sig;
	LUDecomp (WtW, pmt, &sig);
	LUInvert (WtW, pmt, WtWi);

	// read in header
	uint32_t bgen_snp_block_offset;
	uint32_t bgen_header_length;
	uint32_t bgen_nsamples;
	uint32_t bgen_nsnps;
	uint32_t bgen_flags;
	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
	bgen_snp_block_offset-=4;
	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
	bgen_snp_block_offset-=4;
	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
	bgen_snp_block_offset-=4;
	infile.ignore(4+bgen_header_length-20);
	bgen_snp_block_offset-=4+bgen_header_length-20;
	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
	bgen_snp_block_offset-=4;
	bool CompressedSNPBlocks=bgen_flags&0x1;
	bool LongIds=bgen_flags&0x4;

	if(!LongIds) {return false;}

	infile.ignore(bgen_snp_block_offset);

	ns_test=0;

	size_t ns_total=static_cast<size_t>(bgen_nsnps);

	snpInfo.clear();
	string rs;
	long int b_pos;
	string chr;
//	double cM;
	string major;
	string minor;
	string id;

	double v_x, v_w;
	int c_idv=0;


	double maf, geno, geno_old;
	size_t n_miss;
	size_t n_0, n_1, n_2;
	int flag_poly;

	double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;


	size_t ni_total=indicator_idv.size();   // total number of samples in phenotype file
	size_t ni_test=0;   // number of samples to use in test

	uint32_t bgen_N;
	uint16_t bgen_LS;
	uint16_t bgen_LR;
	uint16_t bgen_LC;
	uint32_t bgen_SNP_pos;
	uint32_t bgen_LA;
	std::string bgen_A_allele;
	uint32_t bgen_LB;
	std::string bgen_B_allele;
	uint32_t bgen_P;
	size_t unzipped_data_size;

	for (size_t i=0; i<ni_total; ++i) {

		ni_test+=indicator_idv[i];
	}



//	ns_total=1;
	for (size_t t=0; t<ns_total; ++t) {

		id.clear();
		rs.clear();
		chr.clear();
		bgen_A_allele.clear();
		bgen_B_allele.clear();

		infile.read(reinterpret_cast<char*>(&bgen_N),4);
		infile.read(reinterpret_cast<char*>(&bgen_LS),2);

		id.resize(bgen_LS);
		infile.read(&id[0], bgen_LS);

		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
		rs.resize(bgen_LR);
		infile.read(&rs[0], bgen_LR);

		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
		chr.resize(bgen_LC);
		infile.read(&chr[0], bgen_LC);

		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);

		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
		bgen_A_allele.resize(bgen_LA);
		infile.read(&bgen_A_allele[0], bgen_LA);


		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
		bgen_B_allele.resize(bgen_LB);
		infile.read(&bgen_B_allele[0], bgen_LB);


		// should we switch according to MAF?
		minor=bgen_B_allele;
		major=bgen_A_allele;
		b_pos=static_cast<long int>(bgen_SNP_pos);

		uint16_t unzipped_data[3*bgen_N];

		if (setSnps.size()!=0 && setSnps.count(rs)==0) {
		  SNPINFO sInfo={"-9", rs, -9, -9, minor, major, -9, -9, (long int) -9};
			snpInfo.push_back(sInfo);
			indicator_snp.push_back(0);
			if(CompressedSNPBlocks)
				infile.read(reinterpret_cast<char*>(&bgen_P),4);
			else
				bgen_P=6*bgen_N;

			infile.ignore(static_cast<size_t>(bgen_P));

			continue;
		}


		if(CompressedSNPBlocks)
		{
			infile.read(reinterpret_cast<char*>(&bgen_P),4);
			uint8_t zipped_data[bgen_P];

			unzipped_data_size=6*bgen_N;

			infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
			int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
			assert(result == Z_OK);

		}
		else
		{
			bgen_P=6*bgen_N;
			infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);

		}


		maf=0; n_miss=0; flag_poly=0; geno_old=-9;
		n_0=0; n_1=0; n_2=0;
		c_idv=0;
		gsl_vector_set_zero (genotype_miss);
		for (size_t i=0; i<bgen_N; ++i) {
			// CHECK this set correctly!
			if (indicator_idv[i]==0) {continue;}


			bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
			bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
			bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
			bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;

			//CHECK 0.1 OK
			if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue;}


			bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
			bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
			bgen_geno_prob_BB/=bgen_geno_prob_non_miss;

			geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
			if (geno>=0 && geno<=0.5) {n_0++;}
			if (geno>0.5 && geno<1.5) {n_1++;}
			if (geno>=1.5 && geno<=2.0) {n_2++;}

			gsl_vector_set (genotype, c_idv, geno);

			// CHECK WHAT THIS DOES
			if (flag_poly==0) {geno_old=geno; flag_poly=2;}
			if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}

			maf+=geno;

			c_idv++;
		}

		maf/=2.0*static_cast<double>(ni_test-n_miss);

		SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf};
		snpInfo.push_back(sInfo);

		if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}

		if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}

		if (flag_poly!=1) {indicator_snp.push_back(0); continue;}

		if (hwe_level!=0 && maf_level!=-1) {
			if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
		}

		//filter SNP if it is correlated with W
		//unless W has only one column, of 1s
		for (size_t i=0; i<genotype->size; ++i) {
			if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
		}

		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
		gsl_blas_ddot (genotype, genotype, &v_x);
		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);

		if (W->size2!=1 && v_w/v_x >= r2_level) {indicator_snp.push_back(0); continue;}

		indicator_snp.push_back(1);
		ns_test++;

	}




	return true;

}


//read oxford genotype file and calculate kinship matrix
bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
{
	string file_bgen=file_oxford;
	ifstream infile (file_bgen.c_str(), ios::binary);
	if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return false;}


	// read in header
	uint32_t bgen_snp_block_offset;
	uint32_t bgen_header_length;
	uint32_t bgen_nsamples;
	uint32_t bgen_nsnps;
	uint32_t bgen_flags;
	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
	bgen_snp_block_offset-=4;
	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
	bgen_snp_block_offset-=4;
	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
	bgen_snp_block_offset-=4;
	infile.ignore(4+bgen_header_length-20);
	bgen_snp_block_offset-=4+bgen_header_length-20;
	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
	bgen_snp_block_offset-=4;
	bool CompressedSNPBlocks=bgen_flags&0x1;
//	bool LongIds=bgen_flags&0x4;

	infile.ignore(bgen_snp_block_offset);

	double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;

	uint32_t bgen_N;
	uint16_t bgen_LS;
	uint16_t bgen_LR;
	uint16_t bgen_LC;
	uint32_t bgen_SNP_pos;
	uint32_t bgen_LA;
	std::string bgen_A_allele;
	uint32_t bgen_LB;
	std::string bgen_B_allele;
	uint32_t bgen_P;
	size_t unzipped_data_size;
	string id;
	string rs;
	string chr;
	double genotype;


	size_t n_miss;
	double d, geno_mean, geno_var;

	size_t ni_total=matrix_kin->size1;
	gsl_vector *geno=gsl_vector_alloc (ni_total);
	gsl_vector *geno_miss=gsl_vector_alloc (ni_total);

	size_t ns_test=0;
	for (size_t t=0; t<indicator_snp.size(); ++t) {

		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}

		id.clear();
		rs.clear();
		chr.clear();
		bgen_A_allele.clear();
		bgen_B_allele.clear();

		infile.read(reinterpret_cast<char*>(&bgen_N),4);
		infile.read(reinterpret_cast<char*>(&bgen_LS),2);

		id.resize(bgen_LS);
		infile.read(&id[0], bgen_LS);

		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
		rs.resize(bgen_LR);
		infile.read(&rs[0], bgen_LR);

		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
		chr.resize(bgen_LC);
		infile.read(&chr[0], bgen_LC);

		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);

		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
		bgen_A_allele.resize(bgen_LA);
		infile.read(&bgen_A_allele[0], bgen_LA);


		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
		bgen_B_allele.resize(bgen_LB);
		infile.read(&bgen_B_allele[0], bgen_LB);




		uint16_t unzipped_data[3*bgen_N];

		if (indicator_snp[t]==0) {
			if(CompressedSNPBlocks)
				infile.read(reinterpret_cast<char*>(&bgen_P),4);
			else
				bgen_P=6*bgen_N;

			infile.ignore(static_cast<size_t>(bgen_P));

			continue;
		}



		if(CompressedSNPBlocks)
		{


			infile.read(reinterpret_cast<char*>(&bgen_P),4);
			uint8_t zipped_data[bgen_P];

			unzipped_data_size=6*bgen_N;

			infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);

			int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
			assert(result == Z_OK);

		}
		else
		{

			bgen_P=6*bgen_N;
			infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
		}



		geno_mean=0.0; n_miss=0; geno_var=0.0;
		gsl_vector_set_all(geno_miss, 0);

		for (size_t i=0; i<bgen_N; ++i) {


				bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
				bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
				bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
				// WJA
				bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
				if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(geno_miss, i, 0.0); n_miss++;}
				else {

					bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
					bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
					bgen_geno_prob_BB/=bgen_geno_prob_non_miss;

					genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;

					gsl_vector_set(geno, i, genotype);
					gsl_vector_set(geno_miss, i, 1.0);
					geno_mean+=genotype;
					geno_var+=genotype*genotype;
				}

		}


		geno_mean/=(double)(ni_total-n_miss);
		geno_var+=geno_mean*geno_mean*(double)n_miss;
		geno_var/=(double)ni_total;
		geno_var-=geno_mean*geno_mean;
//		geno_var=geno_mean*(1-geno_mean*0.5);

		for (size_t i=0; i<ni_total; ++i) {
			if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
		}

		gsl_vector_add_constant (geno, -1.0*geno_mean);

		if (geno_var!=0) {
			if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
			else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
			else {cout<<"Unknown kinship mode."<<endl;}
		}

		ns_test++;
    }
	cout<<endl;

	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);

	for (size_t i=0; i<ni_total; ++i) {
		for (size_t j=0; j<i; ++j) {
			d=gsl_matrix_get (matrix_kin, j, i);
			gsl_matrix_set (matrix_kin, i, j, d);
		}
	}

	gsl_vector_free (geno);
	gsl_vector_free (geno_miss);

	infile.close();
	infile.clear();

	return true;
}























//read header to determine which column contains which item
bool ReadHeader (const string &line, HEADER &header)
{
  string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID","MarkerName"};
  set<string> rs_set(rs_ptr, rs_ptr+11);
  string chr_ptr[]={"chr","CHR"};
  set<string> chr_set(chr_ptr, chr_ptr+2);
  string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", "bp", "BP"};
  set<string> pos_set(pos_ptr, pos_ptr+8);
  string cm_ptr[]={"cm","CM"};
  set<string> cm_set(cm_ptr, cm_ptr+2);
  string a1_ptr[]={"a1","A1","allele1","ALLELE1","Allele1","INC_ALLELE"};
  set<string> a1_set(a1_ptr, a1_ptr+5);
  string a0_ptr[]={"a0","A0","allele0","ALLELE0","Allele0","a2","A2","allele2","ALLELE2","Allele2","DEC_ALLELE"};
  set<string> a0_set(a0_ptr, a0_ptr+10);

  string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
  set<string> z_set(z_ptr, z_ptr+6);
  string beta_ptr[]={"beta","BETA","b","B"};
  set<string> beta_set(beta_ptr, beta_ptr+4);
  string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"};
  set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4);
  string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"};
  set<string> chisq_set(chisq_ptr, chisq_ptr+4);
  string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"};
  set<string> p_set(p_ptr, p_ptr+6);

  string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"};
  set<string> n_set(n_ptr, n_ptr+6);
  string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"};
  set<string> nmis_set(nmis_ptr, nmis_ptr+6);
  string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"};
  set<string> nobs_set(nobs_ptr, nobs_ptr+4);
  string ncase_ptr[]={"ncase","NCASE","n_case","N_CASE"};
  set<string> ncase_set(ncase_ptr, ncase_ptr+4);
  string ncontrol_ptr[]={"ncontrol","NCONTROL","n_control","N_CONTROL"};
  set<string> ncontrol_set(ncontrol_ptr, ncontrol_ptr+4);

  string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq","ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY","Freq.Allele1.HapMapCEU","FreqAllele1HapMapCEU", "Freq1.Hapmap"};
  set<string> af_set(af_ptr, af_ptr+13);
  string var_ptr[]={"var","VAR"};
  set<string> var_set(var_ptr, var_ptr+2);

  string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"};
  set<string> ws_set(ws_ptr, ws_ptr+4);
  string cor_ptr[]={"cor","COR","r","R"};
  set<string> cor_set(cor_ptr, cor_ptr+4);

  header.rs_col=0; header.chr_col=0; header.pos_col=0;  header.cm_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; header.nobs_col=0; header.ncase_col=0; header.ncontrol_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; header.cor_col=0; header.coln=0;

  char *ch_ptr;
  string type;
  size_t n_error=0;

  ch_ptr=strtok ((char *)line.c_str(), " , \t");
  while (ch_ptr!=NULL) {
    type=ch_ptr;
    if (rs_set.count(type)!=0) {
      if (header.rs_col==0) {header.rs_col=header.coln+1;} else {cout<<"error! more than two rs columns in the file."<<endl; n_error++;}
    } else if (chr_set.count(type)!=0) {
      if (header.chr_col==0) {header.chr_col=header.coln+1;} else {cout<<"error! more than two chr columns in the file."<<endl; n_error++;}
    } else if (pos_set.count(type)!=0) {
      if (header.pos_col==0) {header.pos_col=header.coln+1;} else {cout<<"error! more than two pos columns in the file."<<endl; n_error++;}
    } else if (cm_set.count(type)!=0) {
      if (header.cm_col==0) {header.cm_col=header.coln+1;} else {cout<<"error! more than two cm columns in the file."<<endl; n_error++;}
    } else if (a1_set.count(type)!=0) {
      if (header.a1_col==0) {header.a1_col=header.coln+1;} else {cout<<"error! more than two allele1 columns in the file."<<endl; n_error++;}
    } else if (a0_set.count(type)!=0) {
      if (header.a0_col==0) {header.a0_col=header.coln+1;} else {cout<<"error! more than two allele0 columns in the file."<<endl; n_error++;}
    } else if (z_set.count(type)!=0) {
      if (header.z_col==0) {header.z_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
    } else if (beta_set.count(type)!=0) {
      if (header.beta_col==0) {header.beta_col=header.coln+1;} else {cout<<"error! more than two beta columns in the file."<<endl; n_error++;}
    } else if (sebeta_set.count(type)!=0) {
      if (header.sebeta_col==0) {header.sebeta_col=header.coln+1;} else {cout<<"error! more than two se_beta columns in the file."<<endl; n_error++;}
    } else if (chisq_set.count(type)!=0) {
      if (header.chisq_col==0) {header.chisq_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
    } else if (p_set.count(type)!=0) {
      if (header.p_col==0) {header.p_col=header.coln+1;} else {cout<<"error! more than two p columns in the file."<<endl; n_error++;}
    } else if (n_set.count(type)!=0) {
      if (header.n_col==0) {header.n_col=header.coln+1;} else {cout<<"error! more than two n_total columns in the file."<<endl; n_error++;}
    } else if (nmis_set.count(type)!=0) {
      if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else {cout<<"error! more than two n_mis columns in the file."<<endl; n_error++;}
    } else if (nobs_set.count(type)!=0) {
      if (header.nobs_col==0) {header.nobs_col=header.coln+1;} else {cout<<"error! more than two n_obs columns in the file."<<endl; n_error++;}
    } else if (ncase_set.count(type)!=0) {
      if (header.ncase_col==0) {header.ncase_col=header.coln+1;} else {cout<<"error! more than two n_case columns in the file."<<endl; n_error++;}
    } else if (ncontrol_set.count(type)!=0) {
      if (header.ncontrol_col==0) {header.ncontrol_col=header.coln+1;} else {cout<<"error! more than two n_control columns in the file."<<endl; n_error++;}
    } else if (ws_set.count(type)!=0) {
      if (header.ws_col==0) {header.ws_col=header.coln+1;} else {cout<<"error! more than two window_size columns in the file."<<endl; n_error++;}
    } else if (af_set.count(type)!=0) {
      if (header.af_col==0) {header.af_col=header.coln+1;} else {cout<<"error! more than two af columns in the file."<<endl; n_error++;}
    } else if (cor_set.count(type)!=0) {
      if (header.cor_col==0) {header.cor_col=header.coln+1;} else {cout<<"error! more than two cor columns in the file."<<endl; n_error++;}
    } else {
      string str = ch_ptr;
      string cat = str.substr(str.size()-2, 2);
      // continuous
      if(cat == "_c" || cat =="_C"){
	header.catc_col.insert(header.coln+1);
      } else { //discrete
	header.catd_col.insert(header.coln+1);
      }
    }
    
    ch_ptr=strtok (NULL, " , \t");
    header.coln++;
  }

  if (header.cor_col!=0 && header.cor_col!=header.coln) {cout<<"error! the cor column should be the last column."<<endl; n_error++;}

  if (header.rs_col==0) {
    if (header.chr_col!=0 && header.pos_col!=0) {
      cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl;
    } else {
      cout<<"error! missing an rs column."<<endl; n_error++;
    }
  }

  if (n_error==0) {return true;} else {return false;}
}




//read category file, record mapRS2in
//the category file does not contain a null category
//so if a snp has 0 for all categories, then it is not included in the analysis
bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_t &n_vc)
{
  mapRS2cat.clear();

  igzstream infile (file_cat.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return false;}

  string line;
  char *ch_ptr;

  string rs, chr, a1, a0, pos, cm;
  size_t i_cat;// ns_vc=0;

  //read header
  HEADER header;
  !safeGetline(infile, line).eof();
  ReadHeader (line, header);

  //use the header to count the number of categories
  n_vc=header.coln;
  if (header.rs_col!=0) {n_vc--;}
  if (header.chr_col!=0) {n_vc--;}
  if (header.pos_col!=0) {n_vc--;}
  if (header.cm_col!=0) {n_vc--;}
  if (header.a1_col!=0) {n_vc--;}
  if (header.a0_col!=0) {n_vc--;}

  //read the following lines to record mapRS2cat
  while (!safeGetline(infile, line).eof()) {
    ch_ptr=strtok ((char *)line.c_str(), " , \t");

    i_cat=0;
    for (size_t i=0; i<header.coln; i++) {
      if (header.rs_col!=0 && header.rs_col==i+1) {
	rs=ch_ptr;
      } else if (header.chr_col!=0 && header.chr_col==i+1) {
	chr=ch_ptr;
      } else if (header.pos_col!=0 && header.pos_col==i+1) {
	pos=ch_ptr;
      } else if (header.cm_col!=0 && header.cm_col==i+1) {
	cm=ch_ptr;
      } else if (header.a1_col!=0 && header.a1_col==i+1) {
	a1=ch_ptr;
      } else if (header.a0_col!=0 && header.a0_col==i+1) {
	a0=ch_ptr;
      } else if (atoi(ch_ptr)==1 || atoi(ch_ptr)==0) {
	if (i_cat==0) {
	  if (header.rs_col==0) {
	    rs=chr+":"+pos;
	  }
	}

	if (atoi(ch_ptr)==1 && mapRS2cat.count(rs)==0) {mapRS2cat[rs]=i_cat;}
	i_cat++;
      } else {}

      ch_ptr=strtok (NULL, " , \t");
    }

    //if (mapRS2cat.count(rs)==0) {mapRS2cat[rs]=n_vc+1; ns_vc++;}
  }

  //if (ns_vc>0) {n_vc++;}

  infile.clear();
  infile.close();

  return true;
}




bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, size_t &n_vc)
{
  mapRS2cat.clear();

  igzstream infile (file_mcat.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open mcategory file: "<<file_mcat<<endl; return false;}

  string file_name;
  map<string, size_t> mapRS2cat_tmp;
  size_t n_vc_tmp, t=0;

  while (!safeGetline(infile, file_name).eof()) {
    mapRS2cat_tmp.clear();
    ReadFile_cat (file_name, mapRS2cat_tmp, n_vc_tmp);
    mapRS2cat.insert(mapRS2cat_tmp.begin(), mapRS2cat_tmp.end());
    if (t==0) {n_vc=n_vc_tmp;} else {n_vc=max(n_vc, n_vc_tmp);}
    t++;
  }

  return true;
}





/*
//read the continuous category file, record mapR2catc
bool ReadFile_catc (const string &file_cat, map<string, vector<double> > &mapRS2catc, size_t &n_cat)
{
  mapRS2catc.clear();

  igzstream infile (file_cat.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return false;}

  string line;
  char *ch_ptr;

  string rs, chr, a1, a0, pos, cm;
  size_t i_cat;// ns_vc=0;

  //read header
  HEADER header;
  !safeGetline(infile, line).eof();
  ReadHeader (line, header);

  //use the header to count the number of categories
  n_cat=header.coln;
  if (header.rs_col!=0) {n_cat--;}
  if (header.chr_col!=0) {n_cat--;}
  if (header.pos_col!=0) {n_cat--;}
  if (header.cm_col!=0) {n_cat--;}
  if (header.a1_col!=0) {n_cat--;}
  if (header.a0_col!=0) {n_cat--;}

  //set up continous category
  vector<double> catc;
  for (size_t i=0; i<n_cat; i++) {
    catc.push_back(0);
  }

  //read the following lines to record mapRS2cat
  while (!safeGetline(infile, line).eof()) {
    ch_ptr=strtok ((char *)line.c_str(), " , \t");

    i_cat=0;
    if (header.rs_col==0) {
      rs=chr+":"+pos;
    }

    for (size_t i=0; i<header.coln; i++) {
      if (header.rs_col!=0 && header.rs_col==i+1) {
	rs=ch_ptr;
      } else if (header.chr_col!=0 && header.chr_col==i+1) {
	chr=ch_ptr;
      } else if (header.pos_col!=0 && header.pos_col==i+1) {
	pos=ch_ptr;
      } else if (header.cm_col!=0 && header.cm_col==i+1) {
	cm=ch_ptr;
      } else if (header.a1_col!=0 && header.a1_col==i+1) {
	a1=ch_ptr;
      } else if (header.a0_col!=0 && header.a0_col==i+1) {
	a0=ch_ptr;
      } else {
	catc[i_cat]=atof(ch_ptr);
	i_cat++;
      }

      ch_ptr=strtok (NULL, " , \t");
    }

    if (mapRS2catc.count(rs)==0) {mapRS2catc[rs]=catc;}

    //if (mapRS2cat.count(rs)==0) {mapRS2cat[rs]=n_vc+1; ns_vc++;}
  }

  //if (ns_vc>0) {n_vc++;}

  infile.clear();
  infile.close();

  return true;
}




bool ReadFile_mcatc (const string &file_mcat, map<string, vector<double> > &mapRS2catc, size_t &n_cat)
{
  mapRS2catc.clear();

  igzstream infile (file_mcat.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open mcategory file: "<<file_mcat<<endl; return false;}

  string file_name;
  map<string, vector<double> > mapRS2catc_tmp;
  size_t n_cat_tmp, t=0;

  while (!safeGetline(infile, file_name).eof()) {
    mapRS2catc_tmp.clear();
    ReadFile_catc (file_name, mapRS2catc_tmp, n_cat_tmp);
    mapRS2catc.insert(mapRS2catc_tmp.begin(), mapRS2catc_tmp.end());
    if (t==0) {n_cat=n_cat_tmp;}
    if (n_cat!=n_cat_tmp) {cout<<"number of category differs in different mcatc files."<<endl;;}

    t++;
  }

  return true;
}
*/




//read bimbam mean genotype file and calculate kinship matrix; this time, the kinship matrix is not centered, and can contain multiple K matrix
bool BimbamKin (const string &file_geno, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns)
{
	igzstream infile (file_geno.c_str(), igzstream::in);
	//ifstream infile (file_geno.c_str(), ifstream::in);
	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}

	string line;
	char *ch_ptr;

	size_t n_miss;
	double d, geno_mean, geno_var;

	size_t ni_test=matrix_kin->size1;
	gsl_vector *geno=gsl_vector_alloc (ni_test);
	gsl_vector *geno_miss=gsl_vector_alloc (ni_test);

	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);

	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
	int sig;
	LUDecomp (WtW, pmt, &sig);
	LUInvert (WtW, pmt, WtWi);

	size_t n_vc=matrix_kin->size2/ni_test, i_vc;
	string rs;
	vector<size_t> ns_vec;
	for (size_t i=0; i<n_vc; i++) {
	  ns_vec.push_back(0);
	}

	//create a large matrix
	size_t msize=10000;
	gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc);
	gsl_matrix_set_zero(Xlarge);

	size_t ns_test=0;
	for (size_t t=0; t<indicator_snp.size(); ++t) {
		!safeGetline(infile, line).eof();
		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
		if (indicator_snp[t]==0) {continue;}

		ch_ptr=strtok ((char *)line.c_str(), " , \t");
		ch_ptr=strtok (NULL, " , \t");
		ch_ptr=strtok (NULL, " , \t");

		rs=snpInfo[t].rs_number;//this line is new

		geno_mean=0.0; n_miss=0; geno_var=0.0;
		gsl_vector_set_all(geno_miss, 0);

		size_t j=0;
		for (size_t i=0; i<indicator_idv.size(); ++i) {
		  if (indicator_idv[i]==0) {continue;}
			ch_ptr=strtok (NULL, " , \t");
			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(geno_miss, i, 0); n_miss++;}
			else {
				d=atof(ch_ptr);
				gsl_vector_set (geno, j, d);
				gsl_vector_set (geno_miss, j, 1);
				geno_mean+=d;
				geno_var+=d*d;
			}
			j++;
		}

		geno_mean/=(double)(ni_test-n_miss);
		geno_var+=geno_mean*geno_mean*(double)n_miss;
		geno_var/=(double)ni_test;
		geno_var-=geno_mean*geno_mean;
//		geno_var=geno_mean*(1-geno_mean*0.5);

		for (size_t i=0; i<ni_test; ++i) {
			if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
		}

		gsl_vector_add_constant (geno, -1.0*geno_mean);

		gsl_blas_dgemv (CblasTrans, 1.0, W, geno, 0.0, Wtx);
		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
		gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
		gsl_blas_ddot (geno, geno, &geno_var);
		geno_var/=(double)ni_test;

		if (geno_var!=0 && (mapRS2weight.size()==0 || mapRS2weight.count(rs)!=0) ) {
		  if (mapRS2weight.size()==0) {
		    d=1.0/geno_var;
		  } else {
		    d=mapRS2weight.at(rs)/geno_var;
		  }

		  /*
		  if (n_vc==1 || mapRS2cat.size()==0 ) {
		    gsl_blas_dsyr (CblasUpper, d, geno, matrix_kin);
		    ns_vec[0]++;
		  } else if (mapRS2cat.count(rs)!=0) {
		      i_vc=mapRS2cat.at(rs);
		      ns_vec[i_vc]++;
		      gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
		      gsl_blas_dsyr (CblasUpper, d, geno, &kin_sub.matrix);
		      //eigenlib_dsyr (1.0, geno, matrix_kin);
		  }
		  */

		  gsl_vector_scale (geno, sqrt(d));
		  if (n_vc==1 || mapRS2cat.size()==0 ) {
		    gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, ns_vec[0]%msize);
		    gsl_vector_memcpy (&Xlarge_col.vector, geno);
		    ns_vec[0]++;

		    if (ns_vec[0]%msize==0) {
		      eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
		      gsl_matrix_set_zero(Xlarge);
		    }
		  } else if (mapRS2cat.count(rs)!=0) {
		    i_vc=mapRS2cat.at(rs);

		    gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, msize*i_vc+ns_vec[i_vc]%msize);
		    gsl_vector_memcpy (&Xlarge_col.vector, geno);

		    ns_vec[i_vc]++;

		    if (ns_vec[i_vc]%msize==0) {
		      gsl_matrix_view X_sub=gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
		      gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
		      eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, &kin_sub.matrix);

		      gsl_matrix_set_zero(&X_sub.matrix);
		    }
		  }

		}
		ns_test++;

	}

	for (size_t i_vc=0; i_vc<n_vc; i_vc++) {
	  if (ns_vec[i_vc]%msize!=0) {
	    gsl_matrix_view X_sub=gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
	    gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
	    eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, &kin_sub.matrix);
	  }
	}

	cout<<endl;

	for (size_t t=0; t<n_vc; t++) {
	  gsl_vector_set(vector_ns, t, ns_vec[t]);

	  for (size_t i=0; i<ni_test; ++i) {
	    for (size_t j=0; j<=i; ++j) {
	      d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
	      d/=(double)ns_vec[t];
	      gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
	      gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
	    }
	  }
	}

	gsl_vector_free (geno);
	gsl_vector_free (geno_miss);

	gsl_vector_free (Wtx);
	gsl_matrix_free (WtW);
	gsl_matrix_free (WtWi);
	gsl_vector_free (WtWiWtx);
	gsl_permutation_free (pmt);

	gsl_matrix_free (Xlarge);

	infile.close();
	infile.clear();

	return true;
}







bool PlinkKin (const string &file_bed, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns)
{
	ifstream infile (file_bed.c_str(), ios::binary);
	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}

	char ch[1];
	bitset<8> b;

	size_t n_miss, ci_total, ci_test;
	double d, geno_mean, geno_var;

	size_t ni_test=matrix_kin->size1;
	size_t ni_total=indicator_idv.size();
	gsl_vector *geno=gsl_vector_alloc (ni_test);

	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);

	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
	int sig;
	LUDecomp (WtW, pmt, &sig);
	LUInvert (WtW, pmt, WtWi);

	size_t ns_test=0;
	int n_bit;

	size_t n_vc=matrix_kin->size2/ni_test, i_vc;
	string rs;
	vector<size_t> ns_vec;
	for (size_t i=0; i<n_vc; i++) {
	  ns_vec.push_back(0);
	}

	//create a large matrix
	size_t msize=10000;
	gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc);
	gsl_matrix_set_zero(Xlarge);

	//calculate n_bit and c, the number of bit for each snp
	if (ni_total%4==0) {n_bit=ni_total/4;}
	else {n_bit=ni_total/4+1; }

	//print the first three majic numbers
	for (int i=0; i<3; ++i) {
		infile.read(ch,1);
		b=ch[0];
	}

	for (size_t t=0; t<indicator_snp.size(); ++t) {
		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
		if (indicator_snp[t]==0) {continue;}

		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers

		rs=snpInfo[t].rs_number;//this line is new

		//read genotypes
		geno_mean=0.0;	n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
		for (int i=0; i<n_bit; ++i) {
			infile.read(ch,1);
			b=ch[0];
			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
				if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
				if (indicator_idv[ci_total]==0) {ci_total++; continue;}

				if (b[2*j]==0) {
					if (b[2*j+1]==0) {gsl_vector_set(geno, ci_test, 2.0); geno_mean+=2.0; geno_var+=4.0; }
					else {gsl_vector_set(geno, ci_test, 1.0); geno_mean+=1.0; geno_var+=1.0;}
				}
				else {
					if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
					else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
				}

				ci_test++;
				ci_total++;
			}
		}


		geno_mean/=(double)(ni_test-n_miss);
		geno_var+=geno_mean*geno_mean*(double)n_miss;
		geno_var/=(double)ni_test;
		geno_var-=geno_mean*geno_mean;
//		geno_var=geno_mean*(1-geno_mean*0.5);

		for (size_t i=0; i<ni_test; ++i) {
			d=gsl_vector_get(geno,i);
			if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
		}

		gsl_vector_add_constant (geno, -1.0*geno_mean);

		gsl_blas_dgemv (CblasTrans, 1.0, W, geno, 0.0, Wtx);
		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
		gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
		gsl_blas_ddot (geno, geno, &geno_var);
		geno_var/=(double)ni_test;

		if (geno_var!=0 && (mapRS2weight.size()==0 || mapRS2weight.count(rs)!=0) ) {
		  if (mapRS2weight.size()==0) {
		    d=1.0/geno_var;
		  } else {
		    d=mapRS2weight.at(rs)/geno_var;
		  }

		  /*
		  if (n_vc==1 || mapRS2cat.size()==0 ) {
		    gsl_blas_dsyr (CblasUpper, d, geno, matrix_kin);
		    ns_vec[0]++;
		  } else if (mapRS2cat.count(rs)!=0) {
		    i_vc=mapRS2cat.at(rs);
		    ns_vec[i_vc]++;
		    gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
		    gsl_blas_dsyr (CblasUpper, d, geno, &kin_sub.matrix);
		  }
		  */

		  gsl_vector_scale (geno, sqrt(d));
		  if (n_vc==1 || mapRS2cat.size()==0 ) {
		    gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, ns_vec[0]%msize);
		    gsl_vector_memcpy (&Xlarge_col.vector, geno);
		    ns_vec[0]++;

		    if (ns_vec[0]%msize==0) {
		      eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
		      gsl_matrix_set_zero(Xlarge);
		    }
		  } else if (mapRS2cat.count(rs)!=0) {
		    i_vc=mapRS2cat.at(rs);

		    gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, msize*i_vc+ns_vec[i_vc]%msize);
		    gsl_vector_memcpy (&Xlarge_col.vector, geno);

		    ns_vec[i_vc]++;

		    if (ns_vec[i_vc]%msize==0) {
		      gsl_matrix_view X_sub=gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
		      gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
		      eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, &kin_sub.matrix);

		      gsl_matrix_set_zero(&X_sub.matrix);
		    }
		  }


		}
		ns_test++;
	}

	for (size_t i_vc=0; i_vc<n_vc; i_vc++) {
	  if (ns_vec[i_vc]%msize!=0) {
	    gsl_matrix_view X_sub=gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
	    gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
	    eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, &kin_sub.matrix);
	  }
	}

	cout<<endl;

	for (size_t t=0; t<n_vc; t++) {
	  gsl_vector_set(vector_ns, t, ns_vec[t]);

	  for (size_t i=0; i<ni_test; ++i) {
	    for (size_t j=0; j<=i; ++j) {
	      d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
	      d/=(double)ns_vec[t];
	      gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
	      gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
	    }
	  }
	}

	gsl_vector_free (geno);

	gsl_vector_free (Wtx);
	gsl_matrix_free (WtW);
	gsl_matrix_free (WtWi);
	gsl_vector_free (WtWiWtx);
	gsl_permutation_free (pmt);

	gsl_matrix_free (Xlarge);

	infile.close();
	infile.clear();

	return true;
}



bool MFILEKin (const size_t mfile_mode, const string &file_mfile, const int display_pace, const vector<int> &indicator_idv, const vector<vector<int> > &mindicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<vector<SNPINFO> > &msnpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns)
{
  size_t n_vc=vector_ns->size, ni_test=matrix_kin->size1;
  gsl_matrix_set_zero(matrix_kin);
  gsl_vector_set_zero(vector_ns);

  igzstream infile (file_mfile.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open mfile file: "<<file_mfile<<endl; return false;}

  string file_name;

  gsl_matrix *kin_tmp=gsl_matrix_alloc (matrix_kin->size1, matrix_kin->size2);
  gsl_vector *ns_tmp=gsl_vector_alloc (vector_ns->size);

  size_t l=0;
  double d;
  while (!safeGetline(infile, file_name).eof()) {
    gsl_matrix_set_zero(kin_tmp);
    gsl_vector_set_zero(ns_tmp);

    if (mfile_mode==1) {
      file_name+=".bed";
      PlinkKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
    } else {
      BimbamKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
    }

    //add ns
    gsl_vector_add(vector_ns, ns_tmp);

    //add kin
    for (size_t t=0; t<n_vc; t++) {
      for (size_t i=0; i<ni_test; ++i) {
	for (size_t j=0; j<=i; ++j) {
	  d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)+gsl_matrix_get (kin_tmp, j, i+ni_test*t)*gsl_vector_get(ns_tmp, t);

	  gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
	  gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
	}
      }
    }
    l++;
  }

  //renormalize kin
  for (size_t t=0; t<n_vc; t++) {
    for (size_t i=0; i<ni_test; ++i) {
      for (size_t j=0; j<=i; ++j) {
	d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)/gsl_vector_get(vector_ns, t);

	gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
	gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);

      }
    }
  }
  cout<<endl;

  infile.close();
  infile.clear();

  gsl_matrix_free(kin_tmp);
  gsl_vector_free(ns_tmp);

  return true;
}




//read var file, store mapRS2wsnp
bool ReadFile_wsnp (const string &file_wsnp, map<string, double> &mapRS2weight)
{
  mapRS2weight.clear();

  igzstream infile (file_wsnp.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open snp weight file: "<<file_wsnp<<endl; return false;}

  char *ch_ptr;
  string line, rs;
  double weight;

  while (!safeGetline(infile, line).eof()) {
    ch_ptr=strtok ((char *)line.c_str(), " , \t");
    rs=ch_ptr;
    ch_ptr=strtok (NULL, " , \t");
    weight=atof(ch_ptr);
    mapRS2weight[rs]=weight;
  }

  return true;
}

bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vector<double> > &mapRS2wvector)
{
  mapRS2wvector.clear();

  igzstream infile (file_wcat.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open snp weight file: "<<file_wcat<<endl; return false;}

  char *ch_ptr;
  vector<double> weight;
  for (size_t i=0; i<n_vc; i++) {
    weight.push_back(0.0);
  }

  string line, rs, chr, a1, a0, pos, cm;
  //double af=0, var_x=0;
  //size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;

  //read header
  HEADER header;
  !safeGetline(infile, line).eof();
  ReadHeader (line, header);

  while (!safeGetline(infile, line).eof()) {
    if (isBlankLine(line)) {continue;}
    ch_ptr=strtok ((char *)line.c_str(), " , \t");

    //n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; n_case=0; af=0; var_x=0;
    size_t t=0;
    for (size_t i=0; i<header.coln; i++) {
      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
      else if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr; }
      else if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr; }
      else if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr; }
      else if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr; }
      else if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr; }
      //else if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr); }
      //else if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr); }
      //else if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr); }
      //else if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr); }
      //else if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {n_control=atoi(ch_ptr); }
      //else if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr); }
      //else if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr); }
      else {
	weight[t]=atof(ch_ptr); t++;
	if (t>n_vc) {cout<<"error! Number of columns in the wcat file does not match that of cat file."; return false;}
      }

      ch_ptr=strtok (NULL, " , \t");
    }

    if (t!=n_vc) {cout<<"error! Number of columns in the wcat file does not match that of cat file."; return false;}

    if (header.rs_col==0) {
      rs=chr+":"+pos;
    }

    mapRS2wvector[rs]=weight;
  }

  return true;
}








//read the beta file, save snp z scores in to z2_score, and save category into indicator_snp based on mapRS2var and set, and indicator_snp record the category number (from 1 to n_vc), and provide var if maf/var is not provided in the beta file
//notice that indicator_snp contains ns_test snps, instead of ns_total snps
//read the beta file for the second time, compute q, and Vq based on block jacknife
//use the mapRS2var to select snps (and to ), calculate q
//do a block-wise jacknife, and compute Vq
void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2cat, const map<string, double> &mapRS2wA, vector<size_t> &vec_cat, vector<size_t> &vec_ni, vector<double> &vec_weight, vector<double> &vec_z2, size_t &ni_total, size_t &ns_total, size_t &ns_test)
{
  vec_cat.clear(); vec_ni.clear(); vec_weight.clear(); vec_z2.clear();
  ni_total=0; ns_total=0; ns_test=0;

  igzstream infile (file_beta.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open beta file: "<<file_beta<<endl; return;}

  string line;
  char *ch_ptr;
  string type;

  string rs, chr, a1, a0, pos, cm;
  double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
  size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;

  //read header
  HEADER header;
  !safeGetline(infile, line).eof();
  ReadHeader (line, header);

  if (header.n_col==0 ) {
    if ( (header.nobs_col==0 && header.nmis_col==0) && (header.ncase_col==0 && header.ncontrol_col==0) ) {
      cout<<"error! missing sample size in the beta file."<<endl;
    } else {
      cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
    }
  }

  if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) && header.chisq_col==0 && header.p_col==0) {
    cout<<"error! missing z scores in the beta file."<<endl;
  }
  /*
  if (header.af_col==0 && header.var_col==0) {
    cout<<"error! missing allele frequency in the beta file."<<endl;
  }
  */
  while (!safeGetline(infile, line).eof()) {
    if (isBlankLine(line)) {continue;}
    ch_ptr=strtok ((char *)line.c_str(), " , \t");

    z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
    n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; af=0; var_x=0;
    for (size_t i=0; i<header.coln; i++) {
      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
      if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
      if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
      if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
      if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
      if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}

      if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
      if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
      if (header.sebeta_col!=0 && header.sebeta_col==i+1) {se_beta=atof(ch_ptr);}
      if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
      if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}

      if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
      if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
      if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
      if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);}
      if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {n_control=atoi(ch_ptr);}

      if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
      if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}

      ch_ptr=strtok (NULL, " , \t");
    }

    if (header.rs_col==0) {
      rs=chr+":"+pos;
    }

    if (header.n_col==0) {
      if (header.nmis_col!=0 && header.nobs_col!=0) {
	n_total=n_mis+n_obs;
      } else {
	n_total=n_case+n_control;
      }
    }

    //both z values and beta/se_beta have directions, while chisq/pvalue do not
    if (header.z_col!=0) {
      zsquare=z*z;
    } else if (header.beta_col!=0 && header.sebeta_col!=0) {
      z=beta/se_beta;
      zsquare=z*z;
    } else if (header.chisq_col!=0) {
      zsquare=chisq;
    } else if (header.p_col!=0) {
      zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
    } else {zsquare=0;}

    //obtain var_x
    if (header.var_col==0 && header.af_col!=0) {
      var_x=2.0*af*(1.0-af);
    }

    //if the snp is also present in cor file, then do calculations
    if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) && (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) && zsquare!=0) {
      if (mapRS2cat.size()!=0) {
	vec_cat.push_back(mapRS2cat.at(rs));
      } else {
	vec_cat.push_back(0);
      }
      vec_ni.push_back(n_total);
      if (mapRS2wA.size()==0) {
	vec_weight.push_back(1);
      } else {
	vec_weight.push_back(mapRS2wA.at(rs));
      }
      vec_z2.push_back(zsquare);

      ni_total=max(ni_total, n_total);
      ns_test++;
    }

    ns_total++;
  }

  infile.clear();
  infile.close();

  return;
}






void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA, map<string, string> &mapRS2A1, map<string, double> &mapRS2z)
{
  mapRS2A1.clear(); mapRS2z.clear();

  igzstream infile (file_beta.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open beta file: "<<file_beta<<endl; return;}

  string line;
  char *ch_ptr;
  string type;

  string rs, chr, a1, a0, pos, cm;
  double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, af=0, var_x=0;
  size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;
  size_t ni_total=0, ns_total=0, ns_test=0;

  //read header
  HEADER header;
  !safeGetline(infile, line).eof();
  ReadHeader (line, header);

  if (header.n_col==0 ) {
    if ( (header.nobs_col==0 && header.nmis_col==0) && (header.ncase_col==0 && header.ncontrol_col==0) ) {
      cout<<"error! missing sample size in the beta file."<<endl;
    } else {
      cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
    }
  }

  if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0)) {
    cout<<"error! missing z scores in the beta file."<<endl;
  }
  /*
  if (header.af_col==0 && header.var_col==0) {
    cout<<"error! missing allele frequency in the beta file."<<endl;
  }
  */
  while (!safeGetline(infile, line).eof()) {
    if (isBlankLine(line)) {continue;}
    ch_ptr=strtok ((char *)line.c_str(), " , \t");

    z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
    n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; af=0; var_x=0;
    for (size_t i=0; i<header.coln; i++) {
      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
      if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
      if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
      if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
      if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
      if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}

      if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
      if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
      if (header.sebeta_col!=0 && header.sebeta_col==i+1) {se_beta=atof(ch_ptr);}
      if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
      if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}

      if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
      if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
      if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
      if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);}
      if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {n_control=atoi(ch_ptr);}

      if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
      if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}

      ch_ptr=strtok (NULL, " , \t");
    }

    if (header.rs_col==0) {
      rs=chr+":"+pos;
    }

    if (header.n_col==0) {
      if (header.nmis_col!=0 && header.nobs_col!=0) {
	n_total=n_mis+n_obs;
      } else {
	n_total=n_case+n_control;
      }
    }

    //both z values and beta/se_beta have directions, while chisq/pvalue do not
    if (header.z_col!=0) {
      z=z;
    } else if (header.beta_col!=0 && header.sebeta_col!=0) {
      z=beta/se_beta;
    } else {
      z=0;
    }

    //if the snp is also present in cor file, then do calculations
    if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) ) {
      mapRS2z[rs]=z;
      mapRS2A1[rs]=a1;

      ni_total=max(ni_total, n_total);
      ns_test++;
    }

    ns_total++;
  }

  infile.clear();
  infile.close();

  return;
}



void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<size_t> &vec_ni, const vector<double> &vec_weight, const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q, gsl_vector *s)
{
  gsl_matrix_set_zero (Vq);
  gsl_vector_set_zero (q);
  gsl_vector_set_zero (s);

  size_t cat, n_total;
  double w, zsquare;

  vector<double> vec_q, vec_s, n_snps;
  for (size_t i=0; i<q->size; i++) {
    vec_q.push_back(0.0);
    vec_s.push_back(0.0);
    n_snps.push_back(0.0);
  }

  vector<vector<double> > mat_q, mat_s;
  for (size_t i=0; i<n_block; i++) {
    mat_q.push_back(vec_q);
    mat_s.push_back(vec_s);
  }

  //compute q and s
  for (size_t i=0; i<vec_cat.size(); i++) {
    //extract quantities
    cat=vec_cat[i];
    n_total=vec_ni[i];
    w=vec_weight[i];
    zsquare=vec_z2[i];

    //compute q and s
    vec_q[cat]+=(zsquare-1.0)*w/(double)n_total;
    vec_s[cat]+=w;
    n_snps[cat]++;
  }

  //update q; vec_q is used again for computing Vq below
  for (size_t i=0; i<q->size; i++) {
    if (vec_s[i]!=0) {
      gsl_vector_set(q, i, vec_q[i]/vec_s[i]);
    }
    gsl_vector_set(s, i, vec_s[i]);
  }

  //compute Vq; divide SNPs in each category into evenly distributed blocks
  size_t t=0, b=0, n_snp=0;
  double d, m, n;
  for (size_t l=0; l<q->size; l++) {
    n_snp=floor(n_snps[l]/n_block); t=0; b=0;
    if (n_snp==0) {continue;}

    //initiate everything to zero
    for (size_t i=0; i<n_block; i++) {
      for (size_t j=0; j<q->size; j++) {
	mat_q[i][j]=0;
	mat_s[i][j]=0;
      }
    }

    //record values
    for (size_t i=0; i<vec_cat.size(); i++) {
      //extract quantities
      cat=vec_cat[i];
      n_total=vec_ni[i];
      w=vec_weight[i];
      zsquare=vec_z2[i];

      //save quantities for computing Vq (which is not divided by n_total)
      mat_q[b][cat]+=(zsquare-1.0)*w;
      mat_s[b][cat]+=w;

      if (cat==l) {
	if (b<n_block-1) {
	  if (t<n_snp-1) {t++;}  else {b++; t=0;}
	} else {
	  t++;
	}
      }
    }

    //center mat_q
    for (size_t i=0; i<q->size; i++) {
      m=0; n=0;
      for (size_t k=0; k<n_block; k++) {
	if (mat_s[k][i]!=0 && vec_s[i]!=mat_s[k][i]) {
	  d=(vec_q[i]-mat_q[k][i])/(vec_s[i]-mat_s[k][i]);
	  mat_q[k][i]=d;
	  m+=d;
	  n++;
	}
      }
      if (n!=0) {m/=n;}

      for (size_t k=0; k<n_block; k++) {
	if (mat_q[k][i]!=0) {
	  mat_q[k][i]-=m;
	}
      }
    }

    //compute Vq for l'th row and l'th column only
    for (size_t i=0; i<q->size; i++) {
      d=0; n=0;
      for (size_t k=0; k<n_block; k++) {
	if (mat_q[k][l]!=0 && mat_q[k][i]!=0) {
	  d+=mat_q[k][l]*mat_q[k][i];
	  n++;
	}
      }
      if (n!=0) {
	d/=n;
	d*=n-1;
      }
      d+=gsl_matrix_get(Vq, i, l);
      gsl_matrix_set(Vq, i, l, d);
      if (i!=l) {gsl_matrix_set(Vq, l, i, d);}
    }

  }

  //divide the off diagonal elements of Vq by 2
  for (size_t i=0; i<q->size; i++) {
    for (size_t j=i; j<q->size; j++) {
      if (i==j) {continue;}
      d=gsl_matrix_get(Vq, i, j);
      gsl_matrix_set(Vq, i, j, d/2);
      gsl_matrix_set(Vq, j, i, d/2);
    }
  }

  return;
}




//read vector file
void ReadFile_vector (const string &file_vec, gsl_vector *vec)
{
  igzstream infile (file_vec.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open vector file: "<<file_vec<<endl; return;}

  string line;
  char *ch_ptr;

  for (size_t i=0; i<vec->size; i++) {
    !safeGetline(infile, line).eof();
    ch_ptr=strtok ((char *)line.c_str(), " , \t");
    gsl_vector_set(vec, i, atof(ch_ptr));
  }

  infile.clear();
  infile.close();

  return;
}


void ReadFile_matrix (const string &file_mat, gsl_matrix *mat)
{
  igzstream infile (file_mat.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open matrix file: "<<file_mat<<endl; return;}

  string line;
  char *ch_ptr;

  for (size_t i=0; i<mat->size1; i++) {
    !safeGetline(infile, line).eof();
    ch_ptr=strtok ((char *)line.c_str(), " , \t");
    for (size_t j=0; j<mat->size2; j++) {
      gsl_matrix_set(mat, i, j, atof(ch_ptr));
      ch_ptr=strtok (NULL, " , \t");
    }
  }

  infile.clear();
  infile.close();

  return;
}

void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1, gsl_matrix *mat2)
{
  igzstream infile (file_mat.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open matrix file: "<<file_mat<<endl; return;}

  string line;
  char *ch_ptr;

  for (size_t i=0; i<mat1->size1; i++) {
    !safeGetline(infile, line).eof();
    ch_ptr=strtok ((char *)line.c_str(), " , \t");
    for (size_t j=0; j<mat1->size2; j++) {
      gsl_matrix_set(mat1, i, j, atof(ch_ptr));
      ch_ptr=strtok (NULL, " , \t");
    }
  }

  for (size_t i=0; i<mat2->size1; i++) {
    !safeGetline(infile, line).eof();
    ch_ptr=strtok ((char *)line.c_str(), " , \t");
    for (size_t j=0; j<mat2->size2; j++) {
      gsl_matrix_set(mat2, i, j, atof(ch_ptr));
      ch_ptr=strtok (NULL, " , \t");
    }
  }

  infile.clear();
  infile.close();

  return;
}



//read study file
void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni)
{
  string Vqfile=file_study+".Vq.txt";
  string sfile=file_study+".size.txt";
  string qfile=file_study+".q.txt";

  gsl_vector *s=gsl_vector_alloc (s_vec->size+1);

  ReadFile_matrix(Vqfile, Vq_mat);
  ReadFile_vector(sfile, s);
  ReadFile_vector(qfile, q_vec);

  double d;
  for (size_t i=0; i<s_vec->size; i++) {
    d=gsl_vector_get (s, i);
    gsl_vector_set (s_vec, i, d);
  }
  ni=gsl_vector_get (s, s_vec->size);

  gsl_vector_free(s);

  return;
}


//read reference file
void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni)
{
  string sfile=file_ref+".size.txt";
  string Sfile=file_ref+".S.txt";
  //string Vfile=file_ref+".V.txt";

  gsl_vector *s=gsl_vector_alloc (s_vec->size+1);

  ReadFile_vector(sfile, s);
  ReadFile_matrix(Sfile, S_mat, Svar_mat);
  //ReadFile_matrix(Vfile, V_mat);

  double d;
  for (size_t i=0; i<s_vec->size; i++) {
    d=gsl_vector_get (s, i);
    gsl_vector_set (s_vec, i, d);
  }
  ni=gsl_vector_get (s, s_vec->size);

  gsl_vector_free(s);

  return;
}


//read mstudy file
void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni)
{
  gsl_matrix_set_zero(Vq_mat);
  gsl_vector_set_zero(q_vec);
  gsl_vector_set_zero(s_vec);
  ni=0;

  gsl_matrix *Vq_sub=gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2);
  gsl_vector *q_sub=gsl_vector_alloc(q_vec->size);
  gsl_vector *s=gsl_vector_alloc (s_vec->size+1);

  igzstream infile (file_mstudy.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open mstudy file: "<<file_mstudy<<endl; return;}

  string file_name;
  double d1, d2, d;

  while (!safeGetline(infile, file_name).eof()) {
    string Vqfile=file_name+".Vq.txt";
    string sfile=file_name+".size.txt";
    string qfile=file_name+".q.txt";

    ReadFile_matrix(Vqfile, Vq_sub);
    ReadFile_vector(sfile, s);
    ReadFile_vector(qfile, q_sub);

    ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size));

    for (size_t i=0; i<s_vec->size; i++) {
      d1=gsl_vector_get (s, i);
      if (d1==0) {continue;}

      d=gsl_vector_get(q_vec, i)+gsl_vector_get(q_sub, i)*d1;
      gsl_vector_set(q_vec, i, d);

      d=gsl_vector_get(s_vec, i)+d1;
      gsl_vector_set(s_vec, i, d);

      for (size_t j=i; j<s_vec->size; j++) {
	d2=gsl_vector_get (s, j);
	if (d2==0) {continue;}

	d=gsl_matrix_get(Vq_mat, i, j)+gsl_matrix_get(Vq_sub, i, j)*d1*d2;
	gsl_matrix_set(Vq_mat, i, j, d);
	if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);}
      }
    }
  }

  for (size_t i=0; i<s_vec->size; i++) {
    d1=gsl_vector_get (s_vec, i);
    if (d1==0) {continue;}

    d=gsl_vector_get (q_vec, i);
    gsl_vector_set (q_vec, i, d/d1);

    for (size_t j=i; j<s_vec->size; j++) {
      d2=gsl_vector_get (s_vec, j);
      if (d2==0) {continue;}

      d=gsl_matrix_get (Vq_mat, i, j)/(d1*d2);
      gsl_matrix_set (Vq_mat, i, j, d);
      if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);}
    }
  }

  gsl_matrix_free(Vq_sub);
  gsl_vector_free(q_sub);
  gsl_vector_free(s);

  return;
}


//copied from lmm.cpp; is used in the following function compKtoV
//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
	if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
	size_t index;
	size_t l, h;
	if (b>a) {l=a; h=b;} else {l=b; h=a;}

	size_t n=n_cvt+2;
	index=(2*n-l+2)*(l-1)/2+h-l;

	return index;
}

//read reference file
void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni)
{
  gsl_matrix_set_zero(S_mat);
  gsl_matrix_set_zero(Svar_mat);
  //  gsl_matrix_set_zero(V_mat);
  gsl_vector_set_zero(s_vec);
  ni=0;

  //size_t n_vc=S_mat->size1;
  gsl_matrix *S_sub=gsl_matrix_alloc (S_mat->size1, S_mat->size2);
  gsl_matrix *Svar_sub=gsl_matrix_alloc (Svar_mat->size1, Svar_mat->size2);
  //gsl_matrix *V_sub=gsl_matrix_alloc (V_mat->size1, V_mat->size2);
  gsl_vector *s=gsl_vector_alloc (s_vec->size+1);

  igzstream infile (file_mref.c_str(), igzstream::in);
  if (!infile) {cout<<"error! fail to open mref file: "<<file_mref<<endl; return;}

  string file_name;
  double d1, d2, d;
  //size_t t_ij;

  while (!safeGetline(infile, file_name).eof()) {
    string sfile=file_name+".size.txt";
    string Sfile=file_name+".S.txt";
    //string Vfile=file_name+".V.txt";

    ReadFile_vector(sfile, s);
    ReadFile_matrix(Sfile, S_sub, Svar_sub);
    //ReadFile_matrix(Vfile, V_sub);

    //update s_vec and ni
    for (size_t i=0; i<s_vec->size; i++) {
      d=gsl_vector_get (s, i)+gsl_vector_get (s_vec, i);
      gsl_vector_set (s_vec, i, d);
    }
    ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size));

    //update S and Svar from each file
    for (size_t i=0; i<S_mat->size1; i++) {
      d1=gsl_vector_get(s, i);
      for (size_t j=0; j<S_mat->size2; j++) {
	d2=gsl_vector_get(s, j);

	d=gsl_matrix_get(S_sub, i, j)*d1*d2;
	gsl_matrix_set(S_sub, i, j, d);
	d=gsl_matrix_get(Svar_sub, i, j)*d1*d2*d1*d2;
	gsl_matrix_set(Svar_sub, i, j, d);
      }
    }

    gsl_matrix_add (S_mat, S_sub);
    gsl_matrix_add (Svar_mat, Svar_sub);
    /*
    //update V from each file
    for (size_t i=0; i<n_vc; i++) {
      d1=gsl_vector_get(s, i);
      for (size_t j=i; j<n_vc; j++) {
	d2=gsl_vector_get(s, j);
	t_ij=GetabIndex (i+1, j+1, n_vc-2);
	for (size_t l=0; l<n_vc+1; l++) {
	  if (l==n_vc) {d3=1;} else {d3=gsl_vector_get(s, l);}
	  for (size_t m=0; m<n_vc+1; m++) {
	    if (m==n_vc) {d4=1;} else {d4=gsl_vector_get(s, m);}

	    d=gsl_matrix_get (V_sub, l, t_ij*(n_vc+1)+m)*d1*d2*d3*d4;
	    gsl_matrix_set (V_sub, l, t_ij*(n_vc+1)+m, d);
	  }
	}
      }
    }

    gsl_matrix_add (V_mat, V_sub);
    */
  }

  //final: update S and Svar
  for (size_t i=0; i<S_mat->size1; i++) {
    d1=gsl_vector_get(s_vec, i);
    if (d1==0) {continue;}
    for (size_t j=i; j<S_mat->size2; j++) {
      d2=gsl_vector_get(s_vec, j);
      if (d2==0) {continue;}

      d=gsl_matrix_get(S_mat, i, j)/(d1*d2);
      gsl_matrix_set(S_mat, i, j, d);
      if (i!=j) {gsl_matrix_set(S_mat, j, i, d);}

      d=gsl_matrix_get(Svar_mat, i, j)/(d1*d2*d1*d2);
      gsl_matrix_set(Svar_mat, i, j, d);
      if (i!=j) {gsl_matrix_set(Svar_mat, j, i, d);}
    }
  }
  /*
  //final: update V
  for (size_t i=0; i<n_vc; i++) {
    d1=gsl_vector_get(s_vec, i);
    if (d1==0) {continue;}
    for (size_t j=i; j<n_vc; j++) {
      d2=gsl_vector_get(s_vec, j);
      if (d2==0) {continue;}
      t_ij=GetabIndex (i+1, j+1, n_vc-2);
	for (size_t l=0; l<n_vc+1; l++) {
	  if (l==n_vc) {d3=1;} else {d3=gsl_vector_get(s_vec, l);}
	  if (d3==0) {continue;}
	  for (size_t m=0; m<n_vc+1; m++) {
	    if (m==n_vc) {d4=1;} else {d4=gsl_vector_get(s_vec, m);}
	    if (d4==0) {continue;}

	    d=gsl_matrix_get (V_mat, l, t_ij*(n_vc+1)+m)/(d1*d2*d3*d4);
	    gsl_matrix_set (V_mat, l, t_ij*(n_vc+1)+m, d);
	  }
	}
      }
    }
  */
  //free matrices
  gsl_matrix_free(S_sub);
  gsl_matrix_free(Svar_sub);
  //gsl_matrix_free(V_sub);
  gsl_vector_free(s);

  return;
}