/* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" #include "lapack.h" #include "gzstream.h" #include "mathfunc.h" #include "eigenlib.h" #include "io.h" using namespace std; // Print progress bar. void ProgressBar (string str, double p, double total) { double progress = (100.0 * p / total); int barsize = (int) (progress / 2.0); char bar[51]; cout<sbumpc(); switch (c) { case '\n': return is; case '\r': if(sb->sgetc() == '\n') sb->sbumpc(); return is; case EOF: // Also handle the case when the last line has no line // ending. if(t.empty()) is.setstate(std::ios::eofbit); return is; default: t += (char)c; } } } // Read SNP file. bool ReadFile_snps (const string &file_snps, set &setSnps) { setSnps.clear(); igzstream infile (file_snps.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open snps file: " << file_snps << endl; return false; } string line; char *ch_ptr; while (getline(infile, line)) { ch_ptr=strtok ((char *)line.c_str(), " , \t"); setSnps.insert(ch_ptr); } infile.close(); infile.clear(); return true; } bool ReadFile_snps_header (const string &file_snps, set &setSnps) { setSnps.clear(); igzstream infile (file_snps.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open snps file: " << file_snps << endl; return false; } string line, rs, chr, pos; char *ch_ptr; // Read header. HEADER header; !safeGetline(infile, line).eof(); ReadHeader_io (line, header); if (header.rs_col==0 && (header.chr_col==0 || header.pos_col==0) ) { cout<<"missing rs id in the hearder"< &mapRS2chr, map &mapRS2bp, map &mapRS2cM) { mapRS2chr.clear(); mapRS2bp.clear(); ifstream infile (file_anno.c_str(), ifstream::in); if (!infile) { cout << "error opening annotation file: " << file_anno << endl; return false; } string line; char *ch_ptr; string rs; long int b_pos; string chr; double cM; while (!safeGetline(infile, line).eof()) { ch_ptr=strtok ((char *)line.c_str(), " , \t"); rs=ch_ptr; ch_ptr=strtok (NULL, " , \t"); if (strcmp(ch_ptr, "NA")==0) { b_pos=-9; } else { b_pos=atol(ch_ptr); } ch_ptr=strtok (NULL, " , \t"); if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) { chr="-9"; } else { chr=ch_ptr; } ch_ptr=strtok (NULL, " , \t"); if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) { cM=-9; } else { cM=atof(ch_ptr); } mapRS2chr[rs]=chr; mapRS2bp[rs]=b_pos; mapRS2cM[rs]=cM; } infile.close(); infile.clear(); return true; } // Read 1 column of phenotype. bool ReadFile_column (const string &file_pheno, vector &indicator_idv, vector &pheno, const int &p_column) { indicator_idv.clear(); pheno.clear(); igzstream infile (file_pheno.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open phenotype file: " << file_pheno << endl; return false; } string line; char *ch_ptr; string id; double p; while (!safeGetline(infile, line).eof()) { ch_ptr=strtok ((char *)line.c_str(), " , \t"); for (int i=0; i<(p_column-1); ++i) { ch_ptr=strtok (NULL, " , \t"); } if (strcmp(ch_ptr, "NA")==0) { indicator_idv.push_back(0); pheno.push_back(-9); } else { // Pheno is different from pimass2. p=atof(ch_ptr); indicator_idv.push_back(1); pheno.push_back(p); } } infile.close(); infile.clear(); return true; } // Read bimbam phenotype file, p_column=1, 2,... bool ReadFile_pheno (const string &file_pheno, vector > &indicator_pheno, vector > &pheno, const vector &p_column) { indicator_pheno.clear(); pheno.clear(); igzstream infile (file_pheno.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open phenotype file: " << file_pheno << endl; return false; } string line; char *ch_ptr; string id; double p; vector pheno_row; vector ind_pheno_row; size_t p_max=*max_element(p_column.begin(), p_column.end() ); map mapP2c; for (size_t i=0; i &indicator_cvt, vector > &cvt, size_t &n_cvt) { indicator_cvt.clear(); ifstream infile (file_cvt.c_str(), ifstream::in); if (!infile) { cout << "error! fail to open covariates file: " << file_cvt << endl; return false; } string line; char *ch_ptr; double d; int flag_na=0; while (!safeGetline(infile, line).eof()) { vector v_d; flag_na=0; ch_ptr=strtok ((char *)line.c_str(), " , \t"); while (ch_ptr!=NULL) { if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;} else {d=atof(ch_ptr);} v_d.push_back(d); ch_ptr=strtok (NULL, " , \t"); } if (flag_na==0) { indicator_cvt.push_back(1); } else { indicator_cvt.push_back(0); } cvt.push_back(v_d); } if (indicator_cvt.empty()) {n_cvt=0;} else { flag_na=0; for (vector::size_type i=0; i &snpInfo) { snpInfo.clear(); ifstream infile (file_bim.c_str(), ifstream::in); if (!infile) { cout << "error opening .bim file: " << file_bim << endl; return false; } string line; char *ch_ptr; string rs; long int b_pos; string chr; double cM; string major; string minor; while (getline(infile, line)) { ch_ptr=strtok ((char *)line.c_str(), " \t"); chr=ch_ptr; ch_ptr=strtok (NULL, " \t"); rs=ch_ptr; ch_ptr=strtok (NULL, " \t"); cM=atof(ch_ptr); ch_ptr=strtok (NULL, " \t"); b_pos=atol(ch_ptr); ch_ptr=strtok (NULL, " \t"); minor=ch_ptr; ch_ptr=strtok (NULL, " \t"); major=ch_ptr; SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0}; snpInfo.push_back(sInfo); } infile.close(); infile.clear(); return true; } // Read .fam file. bool ReadFile_fam (const string &file_fam, vector > &indicator_pheno, vector > &pheno, map &mapID2num, const vector &p_column) { indicator_pheno.clear(); pheno.clear(); mapID2num.clear(); igzstream infile (file_fam.c_str(), igzstream::in); if (!infile) { cout<<"error opening .fam file: "< pheno_row; vector ind_pheno_row; size_t p_max=*max_element(p_column.begin(), p_column.end() ); map mapP2c; for (size_t i=0; i &setSnps, const gsl_matrix *W, vector &indicator_idv, vector &indicator_snp, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, map &mapRS2chr, map &mapRS2bp, map &mapRS2cM, vector &snpInfo, size_t &ns_test) { indicator_snp.clear(); snpInfo.clear(); igzstream infile (file_geno.c_str(), igzstream::in); if (!infile) { cout<<"error reading genotype file:"<size1); gsl_vector *genotype_miss=gsl_vector_alloc (W->size1); gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); gsl_vector *Wtx=gsl_vector_alloc (W->size2); gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); gsl_permutation * pmt=gsl_permutation_alloc (W->size2); gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); int sig; LUDecomp (WtW, pmt, &sig); LUInvert (WtW, pmt, WtWi); double v_x, v_w; int c_idv=0; string line; char *ch_ptr; string rs; long int b_pos; string chr; string major; string minor; double cM; size_t file_pos; double maf, geno, geno_old; size_t n_miss; size_t n_0, n_1, n_2; int flag_poly; int ni_total=indicator_idv.size(); int ni_test=0; for (int i=0; i=0 && geno<=0.5) {n_0++;} if (geno>0.5 && geno<1.5) {n_1++;} if (geno>=1.5 && geno<=2.0) {n_2++;} gsl_vector_set (genotype, c_idv, geno); if (flag_poly==0) {geno_old=geno; flag_poly=2;} if (flag_poly==2 && geno!=geno_old) {flag_poly=1;} maf+=geno; c_idv++; } maf/=2.0*(double)(ni_test-n_miss); SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf, ni_test-n_miss, 0, file_pos}; snpInfo.push_back(sInfo); file_pos++; if ( (double)n_miss/(double)ni_test > miss_level) { indicator_snp.push_back(0); continue; } if ((maf (1.0-maf_level)) && maf_level!=-1) { indicator_snp.push_back(0); continue; } if (flag_poly!=1) {indicator_snp.push_back(0); continue;} if (hwe_level!=0 && maf_level!=-1) { if (CalcHWE(n_0, n_2, n_1)size; ++i) { if (gsl_vector_get (genotype_miss, i)==1) { geno=maf*2.0; gsl_vector_set (genotype, i, geno); } } gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx); gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); gsl_blas_ddot (genotype, genotype, &v_x); gsl_blas_ddot (Wtx, WtWiWtx, &v_w); if (W->size2!=1 && v_w/v_x >= r2_level) { indicator_snp.push_back(0); continue; } indicator_snp.push_back(1); ns_test++; } gsl_vector_free (genotype); gsl_vector_free (genotype_miss); gsl_matrix_free (WtW); gsl_matrix_free (WtWi); gsl_vector_free (Wtx); gsl_vector_free (WtWiWtx); gsl_permutation_free (pmt); infile.close(); infile.clear(); return true; } // Read bed file, the first time. bool ReadFile_bed (const string &file_bed, const set &setSnps, const gsl_matrix *W, vector &indicator_idv, vector &indicator_snp, vector &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test) { indicator_snp.clear(); size_t ns_total=snpInfo.size(); ifstream infile (file_bed.c_str(), ios::binary); if (!infile) { cout<<"error reading bed file:"<size1); gsl_vector *genotype_miss=gsl_vector_alloc (W->size1); gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); gsl_vector *Wtx=gsl_vector_alloc (W->size2); gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); gsl_permutation * pmt=gsl_permutation_alloc (W->size2); gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); int sig; LUDecomp (WtW, pmt, &sig); LUInvert (WtW, pmt, WtWi); double v_x, v_w, geno; size_t c_idv=0; char ch[1]; bitset<8> b; size_t ni_total=indicator_idv.size(); size_t ni_test=0; for (size_t i=0; i miss_level) { indicator_snp.push_back(0); continue; } if ((maf (1.0-maf_level)) && maf_level!=-1) { indicator_snp.push_back(0); continue; } if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) { indicator_snp.push_back(0); continue; } if (hwe_level!=0 && maf_level!=-1) { if (CalcHWE(n_0, n_2, n_1)size; ++i) { if (gsl_vector_get (genotype_miss, i)==1) { geno=maf*2.0; gsl_vector_set (genotype, i, geno); } } gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx); gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); gsl_blas_ddot (genotype, genotype, &v_x); gsl_blas_ddot (Wtx, WtWiWtx, &v_w); if (W->size2!=1 && v_w/v_x > r2_level) { indicator_snp.push_back(0); continue; } indicator_snp.push_back(1); ns_test++; } gsl_vector_free (genotype); gsl_vector_free (genotype_miss); gsl_matrix_free (WtW); gsl_matrix_free (WtWi); gsl_vector_free (Wtx); gsl_vector_free (WtWiWtx); gsl_permutation_free (pmt); infile.close(); infile.clear(); return true; } // Read the genotype for one SNP; remember to read empty lines. // Geno stores original genotypes without centering. // Missing values are replaced by mean. bool Bimbam_ReadOneSNP (const size_t inc, const vector &indicator_idv, igzstream &infile, gsl_vector *geno, double &geno_mean) { size_t ni_total=indicator_idv.size(); string line; char *ch_ptr; bool flag=false; for (size_t i=0; i geno_miss; for (size_t i=0; i &indicator_idv, ifstream &infile, gsl_vector *geno, double &geno_mean) { size_t ni_total=indicator_idv.size(), n_bit; if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1;} // n_bit, and 3 is the number of magic numbers. infile.seekg(pos*n_bit+3); // Read genotypes. char ch[1]; bitset<8> b; geno_mean=0.0; size_t c=0, c_idv=0; vector geno_miss; for (size_t i=0; i &indicator_idv, map &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G) { igzstream infile (file_kin.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open kinship file: "< mapID2ID; size_t c=0; for (size_t i=0; isize1, G->size1, G->size1); ReadFile_kin (file_kin, indicator_idv, mapID2num, k_mode, error, &G_sub.matrix); i++; } infile.close(); infile.clear(); return; } void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U) { igzstream infile (file_ku.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open the U file: "<size1, n_col=U->size2, i_row=0, i_col=0; gsl_matrix_set_zero (U); string line; char *ch_ptr; double d; while (getline(infile, line)) { if (i_row==n_row) { cout<<"error! number of rows in the U file is larger "<< "than expected."<size, i_row=0; gsl_vector_set_zero (eval); string line; char *ch_ptr; double d; while (getline(infile, line)) { if (i_row==n_row) { cout<<"error! number of rows in the D file is larger "<< "than expected."<size1; gsl_vector *geno=gsl_vector_alloc (ni_total); gsl_vector *geno_miss=gsl_vector_alloc (ni_total); // Create a large matrix. size_t msize=10000; gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize); gsl_matrix_set_zero(Xlarge); size_t ns_test=0; for (size_t t=0; t &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin) { ifstream infile (file_bed.c_str(), ios::binary); if (!infile) { cout<<"error reading bed file:"< b; size_t n_miss, ci_total; double d, geno_mean, geno_var; size_t ni_total=matrix_kin->size1; gsl_vector *geno=gsl_vector_alloc (ni_total); size_t ns_test=0; int n_bit; // Create a large matrix. size_t msize=10000; gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize); gsl_matrix_set_zero(Xlarge); // Calculate n_bit and c, the number of bit for each snp. if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1; } //print the first three magic numbers for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; } for (size_t t=0; t &indicator_idv, vector &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { igzstream infile (file_geno.c_str(), igzstream::in); if (!infile) { cout<<"error reading genotype file:"<size1); gsl_vector *genotype_miss=gsl_vector_alloc (UtX->size1); double geno, geno_mean; size_t n_miss; int ni_total=(int)indicator_idv.size(); int ns_total=(int)indicator_snp.size(); int ni_test=UtX->size1; int ns_test=UtX->size2; int c_idv=0, c_snp=0; for (int i=0; isize; ++i) { if (gsl_vector_get (genotype_miss, i)==1) { geno=0; } else { geno=gsl_vector_get (genotype, i); geno-=geno_mean; } gsl_vector_set (genotype, i, geno); gsl_matrix_set (UtX, i, c_snp, geno); } if (calc_K==true) { gsl_blas_dsyr (CblasUpper, 1.0, genotype, K); } c_snp++; } if (calc_K==true) { gsl_matrix_scale (K, 1.0/(double)ns_test); for (size_t i=0; isize; ++i) { for (size_t j=0; j &indicator_idv, vector &indicator_snp, vector > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test) { igzstream infile (file_geno.c_str(), igzstream::in); if (!infile) { cout<<"error reading genotype file:"< Xt_row; for (size_t i=0; isize; ++j) { if (gsl_vector_get (genotype_miss, j)==1) { geno=geno_mean; } else { geno=gsl_vector_get (genotype, j); } Xt_row[j]=Double02ToUchar(geno); gsl_vector_set (genotype, j, (geno-geno_mean)); } Xt.push_back(Xt_row); if (calc_K==true) { gsl_blas_dsyr (CblasUpper, 1.0, genotype, K); } c_snp++; } if (calc_K==true) { gsl_matrix_scale (K, 1.0/(double)ns_test); for (size_t i=0; isize; ++i) { for (size_t j=0; j &indicator_idv, vector &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { ifstream infile (file_bed.c_str(), ios::binary); if (!infile) { cout<<"error reading bed file:"< b; size_t ni_total=indicator_idv.size(); size_t ns_total=indicator_snp.size(); size_t ni_test=UtX->size1; size_t ns_test=UtX->size2; int n_bit; if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1;} // Print the first three magic numbers. for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; } if (calc_K==true) {gsl_matrix_set_zero (K);} gsl_vector *genotype=gsl_vector_alloc (UtX->size1); double geno, geno_mean; size_t n_miss; size_t c_idv=0, c_snp=0, c=0; // Start reading snps and doing association test. for (size_t t=0; tsize; ++i) { geno=gsl_vector_get (genotype, i); if (geno==-9) {geno=0;} else {geno-=geno_mean;} gsl_vector_set (genotype, i, geno); gsl_matrix_set (UtX, i, c_snp, geno); } if (calc_K==true) { gsl_blas_dsyr (CblasUpper, 1.0, genotype, K); } c_snp++; } if (calc_K==true) { gsl_matrix_scale (K, 1.0/(double)ns_test); for (size_t i=0; isize; ++i) { for (size_t j=0; j &indicator_idv, vector &indicator_snp, vector > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test) { ifstream infile (file_bed.c_str(), ios::binary); if (!infile) { cout<<"error reading bed file:"< Xt_row; for (size_t i=0; i b; size_t ni_total=indicator_idv.size(); size_t ns_total=indicator_snp.size(); int n_bit; if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1;} // Print the first three magic numbers. for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; } if (calc_K==true) {gsl_matrix_set_zero (K);} gsl_vector *genotype=gsl_vector_alloc (ni_test); double geno, geno_mean; size_t n_miss; size_t c_idv=0, c_snp=0, c=0; // Start reading SNPs and doing association test. for (size_t t=0; tsize; ++i) { geno=gsl_vector_get (genotype, i); if (geno==-9) {geno=geno_mean;} Xt_row[i]=Double02ToUchar(geno); geno-=geno_mean; gsl_vector_set (genotype, i, geno); } Xt.push_back(Xt_row); if (calc_K==true) { gsl_blas_dsyr (CblasUpper, 1.0, genotype, K); } c_snp++; } if (calc_K==true) { gsl_matrix_scale (K, 1.0/(double)ns_test); for (size_t i=0; isize; ++i) { for (size_t j=0; j &est_column, map &mapRS2est) { mapRS2est.clear(); ifstream infile (file_est.c_str(), ifstream::in); if (!infile) { cout<<"error opening estimated parameter file: "<(infile), istreambuf_iterator(), '\n'); infile.seekg (0, ios::beg); return true; } // Read gene expression file. bool ReadFile_gene (const string &file_gene, vector &vec_read, vector &snpInfo, size_t &ng_total) { vec_read.clear(); ng_total=0; igzstream infile (file_gene.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open gene expression file: "< > &indicator_pheno, vector > &pheno, const vector &p_column, vector &indicator_cvt, vector > &cvt, size_t &n_cvt) { indicator_pheno.clear(); pheno.clear(); indicator_cvt.clear(); igzstream infile (file_sample.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open sample file: "< pheno_row; vector ind_pheno_row; int flag_na=0; size_t num_cols=0; size_t num_p_in_file=0; size_t num_cvt_in_file=0; map mapP2c; for (size_t i=0; i > cvt_factor_levels; char col_type[num_cols]; // Read header line2. if(!safeGetline(infile, line).eof()) { ch_ptr=strtok ((char *)line.c_str(), " \t"); if(strcmp(ch_ptr, "0")!=0) {return false;} ch_ptr=strtok(NULL, " \t"); if(strcmp(ch_ptr, "0")!=0) {return false;} ch_ptr=strtok(NULL, " \t"); if(strcmp(ch_ptr, "0")!=0) {return false;} size_t it=0; ch_ptr=strtok (NULL, " \t"); if(ch_ptr!=NULL) while(ch_ptr!=NULL){ col_type[it++]=ch_ptr[0]; if(ch_ptr[0]=='D') { cvt_factor_levels.push_back(map()); num_cvt_in_file++; } if(ch_ptr[0]=='C') {num_cvt_in_file++;} if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) { num_p_in_file++;} ch_ptr=strtok(NULL, " \t"); } } while (!safeGetline(infile, line).eof()) { ch_ptr=strtok ((char *)line.c_str(), " \t"); for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");} size_t i=0; size_t p_i=0; size_t fac_cvt_i=0; while (i0) { igzstream infile2 (file_sample.c_str(), igzstream::in); if (!infile2) { cout<<"error! fail to open sample file: "<< file_sample< v_d; flag_na=0; ch_ptr=strtok ((char *)line.c_str(), " \t"); for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");} size_t i=0; size_t fac_cvt_i=0; size_t num_fac_levels; while (i1) { if (strcmp(ch_ptr, "NA")==0) { flag_na=1; for(size_t it=0;it::size_type i=0; i &setSnps, const gsl_matrix *W, vector &indicator_idv, vector &indicator_snp, vector &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test) { indicator_snp.clear(); ifstream infile (file_bgen.c_str(), ios::binary); if (!infile) { cout<<"error reading bgen file:"<size1); gsl_vector *genotype_miss=gsl_vector_alloc (W->size1); gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); gsl_vector *Wtx=gsl_vector_alloc (W->size2); gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); gsl_permutation * pmt=gsl_permutation_alloc (W->size2); gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); int sig; LUDecomp (WtW, pmt, &sig); LUInvert (WtW, pmt, WtWi); // Read in header. uint32_t bgen_snp_block_offset; uint32_t bgen_header_length; uint32_t bgen_nsamples; uint32_t bgen_nsnps; uint32_t bgen_flags; infile.read(reinterpret_cast(&bgen_snp_block_offset),4); infile.read(reinterpret_cast(&bgen_header_length),4); bgen_snp_block_offset-=4; infile.read(reinterpret_cast(&bgen_nsnps),4); bgen_snp_block_offset-=4; infile.read(reinterpret_cast(&bgen_nsamples),4); bgen_snp_block_offset-=4; infile.ignore(4+bgen_header_length-20); bgen_snp_block_offset-=4+bgen_header_length-20; infile.read(reinterpret_cast(&bgen_flags),4); bgen_snp_block_offset-=4; bool CompressedSNPBlocks=bgen_flags&0x1; bool LongIds=bgen_flags&0x4; if(!LongIds) {return false;} infile.ignore(bgen_snp_block_offset); ns_test=0; size_t ns_total=static_cast(bgen_nsnps); snpInfo.clear(); string rs; long int b_pos; string chr; string major; string minor; string id; double v_x, v_w; int c_idv=0; double maf, geno, geno_old; size_t n_miss; size_t n_0, n_1, n_2; int flag_poly; double bgen_geno_prob_AA, bgen_geno_prob_AB; double bgen_geno_prob_BB, bgen_geno_prob_non_miss; // Total number of samples in phenotype file. size_t ni_total=indicator_idv.size(); // Number of samples to use in test. size_t ni_test=0; uint32_t bgen_N; uint16_t bgen_LS; uint16_t bgen_LR; uint16_t bgen_LC; uint32_t bgen_SNP_pos; uint32_t bgen_LA; std::string bgen_A_allele; uint32_t bgen_LB; std::string bgen_B_allele; uint32_t bgen_P; size_t unzipped_data_size; for (size_t i=0; i(&bgen_N),4); infile.read(reinterpret_cast(&bgen_LS),2); id.resize(bgen_LS); infile.read(&id[0], bgen_LS); infile.read(reinterpret_cast(&bgen_LR),2); rs.resize(bgen_LR); infile.read(&rs[0], bgen_LR); infile.read(reinterpret_cast(&bgen_LC),2); chr.resize(bgen_LC); infile.read(&chr[0], bgen_LC); infile.read(reinterpret_cast(&bgen_SNP_pos),4); infile.read(reinterpret_cast(&bgen_LA),4); bgen_A_allele.resize(bgen_LA); infile.read(&bgen_A_allele[0], bgen_LA); infile.read(reinterpret_cast(&bgen_LB),4); bgen_B_allele.resize(bgen_LB); infile.read(&bgen_B_allele[0], bgen_LB); // Should we switch according to MAF? minor=bgen_B_allele; major=bgen_A_allele; b_pos=static_cast(bgen_SNP_pos); uint16_t unzipped_data[3*bgen_N]; if (setSnps.size()!=0 && setSnps.count(rs)==0) { SNPINFO sInfo={"-9", rs, -9, -9, minor, major, static_cast(-9), -9, (long int) -9}; snpInfo.push_back(sInfo); indicator_snp.push_back(0); if(CompressedSNPBlocks) infile.read(reinterpret_cast(&bgen_P),4); else bgen_P=6*bgen_N; infile.ignore(static_cast(bgen_P)); continue; } if(CompressedSNPBlocks) { infile.read(reinterpret_cast(&bgen_P),4); uint8_t zipped_data[bgen_P]; unzipped_data_size=6*bgen_N; infile.read(reinterpret_cast(zipped_data), bgen_P); int result= uncompress(reinterpret_cast(unzipped_data), reinterpret_cast(&unzipped_data_size), reinterpret_cast(zipped_data), static_cast (bgen_P)); assert(result == Z_OK); } else { bgen_P=6*bgen_N; infile.read(reinterpret_cast(unzipped_data),bgen_P); } maf=0; n_miss=0; flag_poly=0; geno_old=-9; n_0=0; n_1=0; n_2=0; c_idv=0; gsl_vector_set_zero (genotype_miss); for (size_t i=0; i(unzipped_data[i*3])/32768.0; bgen_geno_prob_AB= static_cast(unzipped_data[i*3+1])/32768.0; bgen_geno_prob_BB= static_cast(unzipped_data[i*3+2])/32768.0; bgen_geno_prob_non_miss= bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB; //CHECK 0.1 OK. if (bgen_geno_prob_non_miss<0.9) { gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue; } bgen_geno_prob_AA/=bgen_geno_prob_non_miss; bgen_geno_prob_AB/=bgen_geno_prob_non_miss; bgen_geno_prob_BB/=bgen_geno_prob_non_miss; geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; if (geno>=0 && geno<=0.5) {n_0++;} if (geno>0.5 && geno<1.5) {n_1++;} if (geno>=1.5 && geno<=2.0) {n_2++;} gsl_vector_set (genotype, c_idv, geno); // CHECK WHAT THIS DOES. if (flag_poly==0) {geno_old=geno; flag_poly=2;} if (flag_poly==2 && geno!=geno_old) {flag_poly=1;} maf+=geno; c_idv++; } maf/=2.0*static_cast(ni_test-n_miss); SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf}; snpInfo.push_back(sInfo); if ( (double)n_miss/(double)ni_test > miss_level) { indicator_snp.push_back(0); continue; } if ((maf (1.0-maf_level)) && maf_level!=-1) { indicator_snp.push_back(0); continue; } if (flag_poly!=1) { indicator_snp.push_back(0); continue; } if (hwe_level!=0 && maf_level!=-1) { if (CalcHWE(n_0, n_2, n_1)size; ++i) { if (gsl_vector_get (genotype_miss, i)==1) { geno=maf*2.0; gsl_vector_set (genotype, i, geno); } } gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx); gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); gsl_blas_ddot (genotype, genotype, &v_x); gsl_blas_ddot (Wtx, WtWiWtx, &v_w); if (W->size2!=1 && v_w/v_x >= r2_level) { indicator_snp.push_back(0); continue;} indicator_snp.push_back(1); ns_test++; } return true; } // Read oxford genotype file and calculate kinship matrix. bool bgenKin (const string &file_oxford, vector &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin) { string file_bgen=file_oxford; ifstream infile (file_bgen.c_str(), ios::binary); if (!infile) { cout<<"error reading bgen file:"<(&bgen_snp_block_offset),4); infile.read(reinterpret_cast(&bgen_header_length),4); bgen_snp_block_offset-=4; infile.read(reinterpret_cast(&bgen_nsnps),4); bgen_snp_block_offset-=4; infile.read(reinterpret_cast(&bgen_nsamples),4); bgen_snp_block_offset-=4; infile.ignore(4+bgen_header_length-20); bgen_snp_block_offset-=4+bgen_header_length-20; infile.read(reinterpret_cast(&bgen_flags),4); bgen_snp_block_offset-=4; bool CompressedSNPBlocks=bgen_flags&0x1; infile.ignore(bgen_snp_block_offset); double bgen_geno_prob_AA, bgen_geno_prob_AB; double bgen_geno_prob_BB, bgen_geno_prob_non_miss; uint32_t bgen_N; uint16_t bgen_LS; uint16_t bgen_LR; uint16_t bgen_LC; uint32_t bgen_SNP_pos; uint32_t bgen_LA; std::string bgen_A_allele; uint32_t bgen_LB; std::string bgen_B_allele; uint32_t bgen_P; size_t unzipped_data_size; string id; string rs; string chr; double genotype; size_t n_miss; double d, geno_mean, geno_var; size_t ni_total=matrix_kin->size1; gsl_vector *geno=gsl_vector_alloc (ni_total); gsl_vector *geno_miss=gsl_vector_alloc (ni_total); size_t ns_test=0; for (size_t t=0; t(&bgen_N),4); infile.read(reinterpret_cast(&bgen_LS),2); id.resize(bgen_LS); infile.read(&id[0], bgen_LS); infile.read(reinterpret_cast(&bgen_LR),2); rs.resize(bgen_LR); infile.read(&rs[0], bgen_LR); infile.read(reinterpret_cast(&bgen_LC),2); chr.resize(bgen_LC); infile.read(&chr[0], bgen_LC); infile.read(reinterpret_cast(&bgen_SNP_pos),4); infile.read(reinterpret_cast(&bgen_LA),4); bgen_A_allele.resize(bgen_LA); infile.read(&bgen_A_allele[0], bgen_LA); infile.read(reinterpret_cast(&bgen_LB),4); bgen_B_allele.resize(bgen_LB); infile.read(&bgen_B_allele[0], bgen_LB); uint16_t unzipped_data[3*bgen_N]; if (indicator_snp[t]==0) { if(CompressedSNPBlocks) infile.read(reinterpret_cast(&bgen_P),4); else bgen_P=6*bgen_N; infile.ignore(static_cast(bgen_P)); continue; } if(CompressedSNPBlocks) { infile.read(reinterpret_cast(&bgen_P),4); uint8_t zipped_data[bgen_P]; unzipped_data_size=6*bgen_N; infile.read(reinterpret_cast(zipped_data),bgen_P); int result= uncompress(reinterpret_cast(unzipped_data), reinterpret_cast(&unzipped_data_size), reinterpret_cast(zipped_data), static_cast (bgen_P)); assert(result == Z_OK); } else { bgen_P=6*bgen_N; infile.read(reinterpret_cast(unzipped_data),bgen_P); } geno_mean=0.0; n_miss=0; geno_var=0.0; gsl_vector_set_all(geno_miss, 0); for (size_t i=0; i(unzipped_data[i*3])/32768.0; bgen_geno_prob_AB= static_cast(unzipped_data[i*3+1])/32768.0; bgen_geno_prob_BB= static_cast(unzipped_data[i*3+2])/32768.0; // WJA bgen_geno_prob_non_miss=bgen_geno_prob_AA + bgen_geno_prob_AB+bgen_geno_prob_BB; if (bgen_geno_prob_non_miss<0.9) { gsl_vector_set(geno_miss, i, 0.0); n_miss++; } else { bgen_geno_prob_AA/=bgen_geno_prob_non_miss; bgen_geno_prob_AB/=bgen_geno_prob_non_miss; bgen_geno_prob_BB/=bgen_geno_prob_non_miss; genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; gsl_vector_set(geno, i, genotype); gsl_vector_set(geno_miss, i, 1.0); geno_mean+=genotype; geno_var+=genotype*genotype; } } geno_mean/=(double)(ni_total-n_miss); geno_var+=geno_mean*geno_mean*(double)n_miss; geno_var/=(double)ni_total; geno_var-=geno_mean*geno_mean; for (size_t i=0; i rs_set(rs_ptr, rs_ptr+11); string chr_ptr[]={"chr","CHR"}; set chr_set(chr_ptr, chr_ptr+2); string pos_ptr[]={"ps","PS","pos","POS","base_position", "BASE_POSITION", "bp", "BP"}; set pos_set(pos_ptr, pos_ptr+8); string cm_ptr[]={"cm","CM"}; set cm_set(cm_ptr, cm_ptr+2); string a1_ptr[]={"a1","A1","allele1","ALLELE1","Allele1","INC_ALLELE"}; set a1_set(a1_ptr, a1_ptr+5); string a0_ptr[]={"a0","A0","allele0","ALLELE0","Allele0","a2","A2", "allele2","ALLELE2","Allele2","DEC_ALLELE"}; set a0_set(a0_ptr, a0_ptr+10); string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"}; set z_set(z_ptr, z_ptr+6); string beta_ptr[]={"beta","BETA","b","B"}; set beta_set(beta_ptr, beta_ptr+4); string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"}; set sebeta_set(sebeta_ptr, sebeta_ptr+4); string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"}; set chisq_set(chisq_ptr, chisq_ptr+4); string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"}; set p_set(p_ptr, p_ptr+6); string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"}; set n_set(n_ptr, n_ptr+6); string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"}; set nmis_set(nmis_ptr, nmis_ptr+6); string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"}; set nobs_set(nobs_ptr, nobs_ptr+4); string ncase_ptr[]={"ncase","NCASE","n_case","N_CASE"}; set ncase_set(ncase_ptr, ncase_ptr+4); string ncontrol_ptr[]={"ncontrol","NCONTROL","n_control","N_CONTROL"}; set ncontrol_set(ncontrol_ptr, ncontrol_ptr+4); string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq", "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY", "Freq.Allele1.HapMapCEU","FreqAllele1HapMapCEU", "Freq1.Hapmap"}; set af_set(af_ptr, af_ptr+13); string var_ptr[]={"var","VAR"}; set var_set(var_ptr, var_ptr+2); string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"}; set ws_set(ws_ptr, ws_ptr+4); string cor_ptr[]={"cor","COR","r","R"}; set cor_set(cor_ptr, cor_ptr+4); header.rs_col=0; header.chr_col=0; header.pos_col=0; header.cm_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; header.nobs_col=0; header.ncase_col=0; header.ncontrol_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; header.cor_col=0; header.coln=0; char *ch_ptr; string type; size_t n_error=0; ch_ptr=strtok ((char *)line.c_str(), " , \t"); while (ch_ptr!=NULL) { type=ch_ptr; if (rs_set.count(type)!=0) { if (header.rs_col==0) { header.rs_col=header.coln+1; } else { cout<<"error! more than two rs columns in the file."< &mapRS2cat, size_t &n_vc) { mapRS2cat.clear(); igzstream infile (file_cat.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open category file: "< &mapRS2cat, size_t &n_vc) { mapRS2cat.clear(); igzstream infile (file_mcat.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open mcategory file: "< mapRS2cat_tmp; size_t n_vc_tmp, t=0; while (!safeGetline(infile, file_name).eof()) { mapRS2cat_tmp.clear(); ReadFile_cat (file_name, mapRS2cat_tmp, n_vc_tmp); mapRS2cat.insert(mapRS2cat_tmp.begin(), mapRS2cat_tmp.end()); if (t==0) {n_vc=n_vc_tmp;} else {n_vc=max(n_vc, n_vc_tmp);} t++; } return true; } // Read bimbam mean genotype file and calculate kinship matrix; this // time, the kinship matrix is not centered, and can contain multiple // K matrix. bool BimbamKin (const string &file_geno, const int display_pace, const vector &indicator_idv, const vector &indicator_snp, const map &mapRS2weight, const map &mapRS2cat, const vector &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns) { igzstream infile (file_geno.c_str(), igzstream::in); if (!infile) { cout<<"error reading genotype file:"<size1; gsl_vector *geno=gsl_vector_alloc (ni_test); gsl_vector *geno_miss=gsl_vector_alloc (ni_test); gsl_vector *Wtx=gsl_vector_alloc (W->size2); gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); gsl_permutation * pmt=gsl_permutation_alloc (W->size2); gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); int sig; LUDecomp (WtW, pmt, &sig); LUInvert (WtW, pmt, WtWi); size_t n_vc=matrix_kin->size2/ni_test, i_vc; string rs; vector ns_vec; for (size_t i=0; i &indicator_idv, const vector &indicator_snp, const map &mapRS2weight, const map &mapRS2cat, const vector &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns) { ifstream infile (file_bed.c_str(), ios::binary); if (!infile) { cout<<"error reading bed file:"< b; size_t n_miss, ci_total, ci_test; double d, geno_mean, geno_var; size_t ni_test=matrix_kin->size1; size_t ni_total=indicator_idv.size(); gsl_vector *geno=gsl_vector_alloc (ni_test); gsl_vector *Wtx=gsl_vector_alloc (W->size2); gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); gsl_permutation * pmt=gsl_permutation_alloc (W->size2); gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); int sig; LUDecomp (WtW, pmt, &sig); LUInvert (WtW, pmt, WtWi); size_t ns_test=0; int n_bit; size_t n_vc=matrix_kin->size2/ni_test, i_vc; string rs; vector ns_vec; for (size_t i=0; i &indicator_idv, const vector > &mindicator_snp, const map &mapRS2weight, const map &mapRS2cat, const vector > &msnpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns) { size_t n_vc=vector_ns->size, ni_test=matrix_kin->size1; gsl_matrix_set_zero(matrix_kin); gsl_vector_set_zero(vector_ns); igzstream infile (file_mfile.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open mfile file: "<size1, matrix_kin->size2); gsl_vector *ns_tmp=gsl_vector_alloc (vector_ns->size); size_t l=0; double d; while (!safeGetline(infile, file_name).eof()) { gsl_matrix_set_zero(kin_tmp); gsl_vector_set_zero(ns_tmp); if (mfile_mode==1) { file_name+=".bed"; PlinkKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp); } else { BimbamKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp); } // Add ns. gsl_vector_add(vector_ns, ns_tmp); // Add kin. for (size_t t=0; t &mapRS2weight) { mapRS2weight.clear(); igzstream infile (file_wsnp.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open snp weight file: "< > &mapRS2wvector) { mapRS2wvector.clear(); igzstream infile (file_wcat.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open snp weight file: "< weight; for (size_t i=0; in_vc) { cout<<"error! Number of columns in the wcat file does not "<< "match that of cat file."; return false; } } ch_ptr=strtok (NULL, " , \t"); } if (t!=n_vc) { cout<<"error! Number of columns in the wcat file does not "<< "match that of cat file."; return false; } if (header.rs_col==0) { rs=chr+":"+pos; } mapRS2wvector[rs]=weight; } return true; } // Read the beta file, save snp z scores in to z2_score, and save // category into indicator_snp based on mapRS2var and set, and // indicator_snp record the category number (from 1 to n_vc), and // provide var if maf/var is not provided in the beta file notice that // indicator_snp contains ns_test snps, instead of ns_total snps read // the beta file for the second time, compute q, and Vq based on block // jacknife use the mapRS2var to select snps (and to ), calculate q do // a block-wise jacknife, and compute Vq void ReadFile_beta (const string &file_beta, const map &mapRS2cat, const map &mapRS2wA, vector &vec_cat, vector &vec_ni, vector &vec_weight, vector &vec_z2, size_t &ni_total, size_t &ns_total, size_t &ns_test) { vec_cat.clear(); vec_ni.clear(); vec_weight.clear(); vec_z2.clear(); ni_total=0; ns_total=0; ns_test=0; igzstream infile (file_beta.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open beta file: "< &mapRS2wA, map &mapRS2A1, map &mapRS2z) { mapRS2A1.clear(); mapRS2z.clear(); igzstream infile (file_beta.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open beta file: "< &vec_cat, const vector &vec_ni, const vector &vec_weight, const vector &vec_z2, gsl_matrix *Vq, gsl_vector *q, gsl_vector *s) { gsl_matrix_set_zero (Vq); gsl_vector_set_zero (q); gsl_vector_set_zero (s); size_t cat, n_total; double w, zsquare; vector vec_q, vec_s, n_snps; for (size_t i=0; isize; i++) { vec_q.push_back(0.0); vec_s.push_back(0.0); n_snps.push_back(0.0); } vector > mat_q, mat_s; for (size_t i=0; isize; i++) { if (vec_s[i]!=0) { gsl_vector_set(q, i, vec_q[i]/vec_s[i]); } gsl_vector_set(s, i, vec_s[i]); } // Compute Vq; divide SNPs in each category into evenly distributed // blocks. size_t t=0, b=0, n_snp=0; double d, m, n; for (size_t l=0; lsize; l++) { n_snp=floor(n_snps[l]/n_block); t=0; b=0; if (n_snp==0) {continue;} // Initiate everything to zero. for (size_t i=0; isize; j++) { mat_q[i][j]=0; mat_s[i][j]=0; } } // Record values. for (size_t i=0; isize; i++) { m=0; n=0; for (size_t k=0; ksize; i++) { d=0; n=0; for (size_t k=0; ksize; i++) { for (size_t j=i; jsize; j++) { if (i==j) {continue;} d=gsl_matrix_get(Vq, i, j); gsl_matrix_set(Vq, i, j, d/2); gsl_matrix_set(Vq, j, i, d/2); } } return; } // Read vector file. void ReadFile_vector (const string &file_vec, gsl_vector *vec) { igzstream infile (file_vec.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open vector file: "<size; i++) { !safeGetline(infile, line).eof(); ch_ptr=strtok ((char *)line.c_str(), " , \t"); gsl_vector_set(vec, i, atof(ch_ptr)); } infile.clear(); infile.close(); return; } void ReadFile_matrix (const string &file_mat, gsl_matrix *mat) { igzstream infile (file_mat.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open matrix file: "<size1; i++) { !safeGetline(infile, line).eof(); ch_ptr=strtok ((char *)line.c_str(), " , \t"); for (size_t j=0; jsize2; j++) { gsl_matrix_set(mat, i, j, atof(ch_ptr)); ch_ptr=strtok (NULL, " , \t"); } } infile.clear(); infile.close(); return; } void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1, gsl_matrix *mat2) { igzstream infile (file_mat.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open matrix file: "<size1; i++) { !safeGetline(infile, line).eof(); ch_ptr=strtok ((char *)line.c_str(), " , \t"); for (size_t j=0; jsize2; j++) { gsl_matrix_set(mat1, i, j, atof(ch_ptr)); ch_ptr=strtok (NULL, " , \t"); } } for (size_t i=0; isize1; i++) { !safeGetline(infile, line).eof(); ch_ptr=strtok ((char *)line.c_str(), " , \t"); for (size_t j=0; jsize2; j++) { gsl_matrix_set(mat2, i, j, atof(ch_ptr)); ch_ptr=strtok (NULL, " , \t"); } } infile.clear(); infile.close(); return; } // Read study file. void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) { string Vqfile=file_study+".Vq.txt"; string sfile=file_study+".size.txt"; string qfile=file_study+".q.txt"; gsl_vector *s=gsl_vector_alloc (s_vec->size+1); ReadFile_matrix(Vqfile, Vq_mat); ReadFile_vector(sfile, s); ReadFile_vector(qfile, q_vec); double d; for (size_t i=0; isize; i++) { d=gsl_vector_get (s, i); gsl_vector_set (s_vec, i, d); } ni=gsl_vector_get (s, s_vec->size); gsl_vector_free(s); return; } // Read reference file. void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) { string sfile=file_ref+".size.txt"; string Sfile=file_ref+".S.txt"; gsl_vector *s=gsl_vector_alloc (s_vec->size+1); ReadFile_vector(sfile, s); ReadFile_matrix(Sfile, S_mat, Svar_mat); double d; for (size_t i=0; isize; i++) { d=gsl_vector_get (s, i); gsl_vector_set (s_vec, i, d); } ni=gsl_vector_get (s, s_vec->size); gsl_vector_free(s); return; } // Read mstudy file. void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) { gsl_matrix_set_zero(Vq_mat); gsl_vector_set_zero(q_vec); gsl_vector_set_zero(s_vec); ni=0; gsl_matrix *Vq_sub=gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2); gsl_vector *q_sub=gsl_vector_alloc(q_vec->size); gsl_vector *s=gsl_vector_alloc (s_vec->size+1); igzstream infile (file_mstudy.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open mstudy file: "<size)); for (size_t i=0; isize; i++) { d1=gsl_vector_get (s, i); if (d1==0) {continue;} d=gsl_vector_get(q_vec, i)+gsl_vector_get(q_sub, i)*d1; gsl_vector_set(q_vec, i, d); d=gsl_vector_get(s_vec, i)+d1; gsl_vector_set(s_vec, i, d); for (size_t j=i; jsize; j++) { d2=gsl_vector_get (s, j); if (d2==0) {continue;} d=gsl_matrix_get(Vq_mat, i, j)+gsl_matrix_get(Vq_sub, i, j)*d1*d2; gsl_matrix_set(Vq_mat, i, j, d); if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);} } } } for (size_t i=0; isize; i++) { d1=gsl_vector_get (s_vec, i); if (d1==0) {continue;} d=gsl_vector_get (q_vec, i); gsl_vector_set (q_vec, i, d/d1); for (size_t j=i; jsize; j++) { d2=gsl_vector_get (s_vec, j); if (d2==0) {continue;} d=gsl_matrix_get (Vq_mat, i, j)/(d1*d2); gsl_matrix_set (Vq_mat, i, j, d); if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);} } } gsl_matrix_free(Vq_sub); gsl_vector_free(q_sub); gsl_vector_free(s); return; } // Read reference file. void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) { gsl_matrix_set_zero(S_mat); gsl_matrix_set_zero(Svar_mat); gsl_vector_set_zero(s_vec); ni=0; gsl_matrix *S_sub=gsl_matrix_alloc (S_mat->size1, S_mat->size2); gsl_matrix *Svar_sub=gsl_matrix_alloc (Svar_mat->size1, Svar_mat->size2); gsl_vector *s=gsl_vector_alloc (s_vec->size+1); igzstream infile (file_mref.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open mref file: "<size; i++) { d=gsl_vector_get (s, i)+gsl_vector_get (s_vec, i); gsl_vector_set (s_vec, i, d); } ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size)); // Update S and Svar from each file. for (size_t i=0; isize1; i++) { d1=gsl_vector_get(s, i); for (size_t j=0; jsize2; j++) { d2=gsl_vector_get(s, j); d=gsl_matrix_get(S_sub, i, j)*d1*d2; gsl_matrix_set(S_sub, i, j, d); d=gsl_matrix_get(Svar_sub, i, j)*d1*d2*d1*d2; gsl_matrix_set(Svar_sub, i, j, d); } } gsl_matrix_add (S_mat, S_sub); gsl_matrix_add (Svar_mat, Svar_sub); } // Final: update S and Svar. for (size_t i=0; isize1; i++) { d1=gsl_vector_get(s_vec, i); if (d1==0) {continue;} for (size_t j=i; jsize2; j++) { d2=gsl_vector_get(s_vec, j); if (d2==0) {continue;} d=gsl_matrix_get(S_mat, i, j)/(d1*d2); gsl_matrix_set(S_mat, i, j, d); if (i!=j) {gsl_matrix_set(S_mat, j, i, d);} d=gsl_matrix_get(Svar_mat, i, j)/(d1*d2*d1*d2); gsl_matrix_set(Svar_mat, i, j, d); if (i!=j) {gsl_matrix_set(Svar_mat, j, i, d);} } } // Free matrices. gsl_matrix_free(S_sub); gsl_matrix_free(Svar_sub); gsl_vector_free(s); return; }