aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/io.cpp2042
1 files changed, 1161 insertions, 881 deletions
diff --git a/src/io.cpp b/src/io.cpp
index 40f1c16..3a4bc3c 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -27,8 +27,10 @@
#include <set>
#include <cstring>
#include <cmath>
+#include <cstdint>
#include <stdio.h>
#include <stdlib.h>
+#include <assert.h>
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
@@ -310,8 +312,10 @@ bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv,
if (strcmp(ch_ptr, "NA")==0) {
indicator_idv.push_back(0);
pheno.push_back(-9);
- } // Pheno is different from pimass2.
+ }
else {
+
+ // Pheno is different from pimass2.
p=atof(ch_ptr);
indicator_idv.push_back(1);
pheno.push_back(p);
@@ -487,15 +491,18 @@ bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo) {
}
// Read .fam file.
-bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, map<string, int> &mapID2num, const vector<size_t> &p_column)
-{
+bool ReadFile_fam (const string &file_fam,
+ vector<vector<int> > &indicator_pheno,
+ vector<vector<double> > &pheno,
+ map<string, int> &mapID2num,
+ const vector<size_t> &p_column) {
indicator_pheno.clear();
pheno.clear();
mapID2num.clear();
igzstream infile (file_fam.c_str(), igzstream::in);
- //ifstream infile (file_fam.c_str(), ifstream::in);
- if (!infile) {cout<<"error opening .fam file: "<<file_fam<<endl; return false;}
+ if (!infile) {
+ cout<<"error opening .fam file: "<<file_fam<<endl; return false;}
string line;
char *ch_ptr;
@@ -528,12 +535,19 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
while (i<p_max ) {
if (mapP2c.count(i+1)!=0 ) {
if (strcmp(ch_ptr, "NA")==0) {
- ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;
+ ind_pheno_row[mapP2c[i+1]]=0;
+ pheno_row[mapP2c[i+1]]=-9;
} else {
- p=atof(ch_ptr);
+ p=atof(ch_ptr);
- if (p==-9) {ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;}
- else {ind_pheno_row[mapP2c[i+1]]=1; pheno_row[mapP2c[i+1]]=p;}
+ if (p==-9) {
+ ind_pheno_row[mapP2c[i+1]]=0;
+ pheno_row[mapP2c[i+1]]=-9;
+ }
+ else {
+ ind_pheno_row[mapP2c[i+1]]=1;
+ pheno_row[mapP2c[i+1]]=p;
+ }
}
}
i++;
@@ -551,20 +565,26 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
return true;
}
-
-
-
-
-
-//Read bimbam mean genotype file, the first time, to obtain #SNPs for analysis (ns_test) and total #SNP (ns_total)
-bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, size_t &ns_test)
-{
+// Read bimbam mean genotype file, the first time, to obtain #SNPs for
+// analysis (ns_test) and total #SNP (ns_total).
+bool ReadFile_geno (const string &file_geno, const set<string> &setSnps,
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, const double &maf_level,
+ const double &miss_level, const double &hwe_level,
+ const double &r2_level,
+ map<string, string> &mapRS2chr,
+ map<string, long int> &mapRS2bp,
+ map<string, double> &mapRS2cM,
+ vector<SNPINFO> &snpInfo,
+ size_t &ns_test) {
indicator_snp.clear();
snpInfo.clear();
igzstream infile (file_geno.c_str(), igzstream::in);
-// ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading genotype file:"<<file_geno<<endl;
+ return false;
+ }
gsl_vector *genotype=gsl_vector_alloc (W->size1);
gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
@@ -575,7 +595,6 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- //eigenlib_dgemm("T", "N", 1.0, W, W, 0.0, WtW);
int sig;
LUDecomp (WtW, pmt, &sig);
LUInvert (WtW, pmt, WtWi);
@@ -616,7 +635,8 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
major=ch_ptr;
if (setSnps.size()!=0 && setSnps.count(rs)==0) {
- SNPINFO sInfo={"-9", rs, -9, -9, minor, major, 0, -9, -9, 0, 0, file_pos};
+ SNPINFO sInfo={"-9", rs, -9, -9, minor, major, 0, -9, -9,
+ 0, 0, file_pos};
snpInfo.push_back(sInfo);
indicator_snp.push_back(0);
@@ -634,7 +654,12 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue;}
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set (genotype_miss, c_idv, 1);
+ n_miss++;
+ c_idv++;
+ continue;
+ }
geno=atof(ch_ptr);
if (geno>=0 && geno<=0.5) {n_0++;}
@@ -643,8 +668,6 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
gsl_vector_set (genotype, c_idv, geno);
-// if (geno<0) {n_miss++; continue;}
-
if (flag_poly==0) {geno_old=geno; flag_poly=2;}
if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
@@ -654,24 +677,38 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
}
maf/=2.0*(double)(ni_test-n_miss);
- SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf, ni_test-n_miss, 0, file_pos};
+ SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss,
+ (double)n_miss/(double)ni_test, maf,
+ ni_test-n_miss, 0, file_pos};
snpInfo.push_back(sInfo);
file_pos++;
- if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
+ if ( (double)n_miss/(double)ni_test > miss_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
- if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
+ if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
if (flag_poly!=1) {indicator_snp.push_back(0); continue;}
if (hwe_level!=0 && maf_level!=-1) {
- if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
+ if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
}
- //filter SNP if it is correlated with W
- //unless W has only one column, of 1s
+ // Filter SNP if it is correlated with W unless W has
+ // only one column, of 1s.
for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
+ if (gsl_vector_get (genotype_miss, i)==1) {
+ geno=maf*2.0;
+ gsl_vector_set (genotype, i, geno);
+ }
}
gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
@@ -679,7 +716,10 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
gsl_blas_ddot (genotype, genotype, &v_x);
gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
- if (W->size2!=1 && v_w/v_x >= r2_level) {indicator_snp.push_back(0); continue;}
+ if (W->size2!=1 && v_w/v_x >= r2_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
indicator_snp.push_back(1);
ns_test++;
@@ -699,19 +739,21 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
return true;
}
-
-
-
-
-
-//Read bed file, the first time
-bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test)
-{
+// Read bed file, the first time.
+bool ReadFile_bed (const string &file_bed, const set<string> &setSnps,
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+ const double &maf_level, const double &miss_level,
+ const double &hwe_level, const double &r2_level,
+ size_t &ns_test) {
indicator_snp.clear();
size_t ns_total=snpInfo.size();
ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading bed file:"<<file_bed<<endl;
+ return false;
+ }
gsl_vector *genotype=gsl_vector_alloc (W->size1);
gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
@@ -739,12 +781,12 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
}
ns_test=0;
- //calculate n_bit and c, the number of bit for each snp
+ // Calculate n_bit and c, the number of bit for each snp.
size_t n_bit;
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1;}
- //ignore the first three majic numbers
+ // Ignore the first three magic numbers.
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
@@ -754,11 +796,14 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
size_t n_miss;
size_t n_0, n_1, n_2, c;
- //start reading snps and doing association test
+ // Start reading snps and doing association test.
for (size_t t=0; t<ns_total; ++t) {
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
- if (setSnps.size()!=0 && setSnps.count(snpInfo[t].rs_number)==0) {
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t*n_bit+3);
+
+ if (setSnps.size()!=0 &&
+ setSnps.count(snpInfo[t].rs_number) == 0) {
snpInfo[t].n_miss=-9;
snpInfo[t].missingness=-9;
snpInfo[t].maf=-9;
@@ -767,24 +812,41 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
continue;
}
- //read genotypes
+ // Read genotypes.
c=0; maf=0.0; n_miss=0; n_0=0; n_1=0; n_2=0;
c_idv=0; gsl_vector_set_zero (genotype_miss);
for (size_t i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && c==ni_total) {break;}
if (indicator_idv[c]==0) {c++; continue;}
c++;
if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); maf+=2.0; n_2++;}
- else {gsl_vector_set(genotype, c_idv, 1.0); maf+=1.0; n_1++;}
+ if (b[2*j+1]==0) {
+ gsl_vector_set(genotype, c_idv, 2.0);
+ maf+=2.0;
+ n_2++;
+ }
+ else {
+ gsl_vector_set(genotype, c_idv, 1.0);
+ maf+=1.0;
+ n_1++;
+ }
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); maf+=0.0; n_0++;}
- else {gsl_vector_set(genotype_miss, c_idv, 1); n_miss++; }
+ if (b[2*j+1]==1) {
+ gsl_vector_set(genotype, c_idv, 0.0);
+ maf+=0.0;
+ n_0++;
+ }
+ else {
+ gsl_vector_set(genotype_miss, c_idv, 1);
+ n_miss++;
+ }
}
c_idv++;
}
@@ -798,20 +860,35 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
snpInfo[t].n_nb=0;
snpInfo[t].file_position=t;
- if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
+ if ( (double)n_miss/(double)ni_test > miss_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
- if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
+ if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
- if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) {indicator_snp.push_back(0); continue;}
+ if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) {
+ indicator_snp.push_back(0);
+ continue;
+ }
if (hwe_level!=0 && maf_level!=-1) {
- if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
+ if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
}
- //filter SNP if it is correlated with W
- //unless W has only one column, of 1s
+ // Filter SNP if it is correlated with W unless W has
+ // only one column, of 1s.
for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
+ if (gsl_vector_get (genotype_miss, i)==1) {
+ geno=maf*2.0;
+ gsl_vector_set (genotype, i, geno);
+ }
}
gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
@@ -819,7 +896,10 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
gsl_blas_ddot (genotype, genotype, &v_x);
gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
- if (W->size2!=1 && v_w/v_x > r2_level) {indicator_snp.push_back(0); continue;}
+ if (W->size2!=1 && v_w/v_x > r2_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
indicator_snp.push_back(1);
ns_test++;
@@ -839,20 +919,14 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
return true;
}
-
-
-
-
-//read the genotype for one SNP; remember to read empty lines
-//geno stores original genotypes without centering
-//missing values are replaced by mean
-bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv, igzstream &infile, gsl_vector *geno, double &geno_mean)
-{
+// Read the genotype for one SNP; remember to read empty lines.
+// Geno stores original genotypes without centering.
+// Missing values are replaced by mean.
+bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv,
+ igzstream &infile, gsl_vector *geno,
+ double &geno_mean) {
size_t ni_total=indicator_idv.size();
- // if (infile.eof()) {infile.clear();}
- // infile.seekg(pos);
-
string line;
char *ch_ptr;
bool flag=false;
@@ -896,16 +970,17 @@ bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv, igzs
return flag;
}
-
-//for plink, store SNPs as double too
-void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, ifstream &infile, gsl_vector *geno, double &geno_mean)
-{
+// For PLINK, store SNPs as double too.
+void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv,
+ ifstream &infile, gsl_vector *geno, double &geno_mean) {
size_t ni_total=indicator_idv.size(), n_bit;
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1;}
- infile.seekg(pos*n_bit+3); //n_bit, and 3 is the number of magic numbers
- //read genotypes
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(pos*n_bit+3);
+
+ // Read genotypes.
char ch[1];
bitset<8> b;
@@ -916,7 +991,9 @@ void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, ifstream
for (size_t i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && c==ni_total) {break;}
if (indicator_idv[c]==0) {c++; continue;}
c++;
@@ -951,15 +1028,14 @@ void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, ifstream
return;
}
-
-
-
-
-void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
-{
+void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv,
+ map<string, int> &mapID2num, const size_t k_mode,
+ bool &error, gsl_matrix *G) {
igzstream infile (file_kin.c_str(), igzstream::in);
-// ifstream infile (file_kin.c_str(), ifstream::in);
- if (!infile) {cout<<"error! fail to open kinship file: "<<file_kin<<endl; error=true; return;}
+ if (!infile) {
+ cout<<"error! fail to open kinship file: "<<file_kin<<endl;
+ error=true; return;
+ }
size_t ni_total=indicator_idv.size();
@@ -972,25 +1048,47 @@ void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<strin
if (k_mode==1) {
size_t i_test=0, i_total=0, j_test=0, j_total=0;
while (getline(infile, line)) {
- if (i_total==ni_total) {cout<<"error! number of rows in the kinship file is larger than the number of phentypes."<<endl; error=true;}
+ if (i_total==ni_total) {
+ cout<<"error! number of rows in the kinship "<<
+ "file is larger than the number of phentypes."<<
+ endl;
+ error=true;
+ }
if (indicator_idv[i_total]==0) {i_total++; continue;}
j_total=0; j_test=0;
ch_ptr=strtok ((char *)line.c_str(), " , \t");
while (ch_ptr!=NULL) {
- if (j_total==ni_total) {cout<<"error! number of columns in the kinship file is larger than the number of phentypes for row = "<<i_total<<endl; error=true;}
+ if (j_total==ni_total) {
+ cout<<"error! number of columns in the "<<
+ "kinship file is larger than the number"<<
+ " of phentypes for row = "<<i_total<<endl;
+ error=true;
+ }
d=atof(ch_ptr);
- if (indicator_idv[j_total]==1) {gsl_matrix_set (G, i_test, j_test, d); j_test++;}
+ if (indicator_idv[j_total]==1) {
+ gsl_matrix_set (G, i_test, j_test, d);
+ j_test++;
+ }
j_total++;
ch_ptr=strtok (NULL, " , \t");
}
- if (j_total!=ni_total) {cout<<"error! number of columns in the kinship file do not match the number of phentypes for row = "<<i_total<<endl; error=true;}
+ if (j_total!=ni_total) {
+ cout<<"error! number of columns in the kinship "<<
+ "file do not match the number of phentypes for "<<
+ "row = "<<i_total<<endl;
+ error=true;
+ }
i_total++; i_test++;
}
- if (i_total!=ni_total) {cout<<"error! number of rows in the kinship file do not match the number of phentypes."<<endl; error=true;}
+ if (i_total!=ni_total) {
+ cout<<"error! number of rows in the kinship file do "<<
+ "not match the number of phentypes."<<endl;
+ error=true;
+ }
}
else {
map<size_t, size_t> mapID2ID;
@@ -1010,14 +1108,24 @@ void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<strin
id2=ch_ptr;
ch_ptr=strtok (NULL, " , \t");
d=atof(ch_ptr);
- if (mapID2num.count(id1)==0 || mapID2num.count(id2)==0) {continue;}
- if (indicator_idv[mapID2num[id1]]==0 || indicator_idv[mapID2num[id2]]==0) {continue;}
+ if (mapID2num.count(id1)==0 ||
+ mapID2num.count(id2)==0) {
+ continue;
+ }
+ if (indicator_idv[mapID2num[id1]]==0 ||
+ indicator_idv[mapID2num[id2]]==0) {
+ continue;
+ }
n_id1=mapID2ID[mapID2num[id1]];
n_id2=mapID2ID[mapID2num[id2]];
Cov_d=gsl_matrix_get(G, n_id1, n_id2);
- if (Cov_d!=0 && Cov_d!=d) {cout<<"error! redundant and unequal terms in the kinship file, for id1 = "<<id1<<" and id2 = "<<id2<<endl;}
+ if (Cov_d!=0 && Cov_d!=d) {
+ cout<<"error! redundant and unequal terms in the "<<
+ "kinship file, for id1 = "<<id1<<" and id2 = "<<
+ id2<<endl;
+ }
else {
gsl_matrix_set(G, n_id1, n_id2, d);
gsl_matrix_set(G, n_id2, n_id1, d);
@@ -1031,19 +1139,24 @@ void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<strin
return;
}
-
-void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
-{
+void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv,
+ map<string, int> &mapID2num, const size_t k_mode,
+ bool &error, gsl_matrix *G) {
igzstream infile (file_mk.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open file: "<<file_mk<<endl; error=true; return;}
+ if (!infile) {cout<<"error! fail to open file: "<<file_mk<<endl;
+ error=true;
+ return;
+ }
string file_kin, line;
size_t i=0;
while (getline(infile, line)) {
file_kin=line.c_str();
- gsl_matrix_view G_sub=gsl_matrix_submatrix(G, 0, i*G->size1, G->size1, G->size1);
- ReadFile_kin (file_kin, indicator_idv, mapID2num, k_mode, error, &G_sub.matrix);
+ gsl_matrix_view G_sub=gsl_matrix_submatrix(G, 0, i*G->size1,
+ G->size1, G->size1);
+ ReadFile_kin (file_kin, indicator_idv, mapID2num, k_mode,
+ error, &G_sub.matrix);
i++;
}
@@ -1052,12 +1165,13 @@ void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string,
return;
}
-
-void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U)
-{
+void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U) {
igzstream infile (file_ku.c_str(), igzstream::in);
-// ifstream infile (file_ku.c_str(), ifstream::in);
- if (!infile) {cout<<"error! fail to open the U file: "<<file_ku<<endl; error=true; return;}
+ if (!infile) {
+ cout<<"error! fail to open the U file: "<<file_ku<<endl;
+ error=true;
+ return;
+ }
size_t n_row=U->size1, n_col=U->size2, i_row=0, i_col=0;
@@ -1068,12 +1182,21 @@ void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U)
double d;
while (getline(infile, line)) {
- if (i_row==n_row) {cout<<"error! number of rows in the U file is larger than expected."<<endl; error=true;}
+ if (i_row==n_row) {
+ cout<<"error! number of rows in the U file is larger "<<
+ "than expected."<<endl;
+ error=true;
+ }
i_col=0;
ch_ptr=strtok ((char *)line.c_str(), " , \t");
while (ch_ptr!=NULL) {
- if (i_col==n_col) {cout<<"error! number of columns in the U file is larger than expected, for row = "<<i_row<<endl; error=true;}
+ if (i_col==n_col) {
+ cout<<"error! number of columns in the U file "<<
+ "is larger than expected, for row = "<<
+ i_row<<endl;
+ error=true;
+ }
d=atof(ch_ptr);
gsl_matrix_set (U, i_row, i_col, d);
@@ -1091,14 +1214,13 @@ void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U)
return;
}
-
-
-
-void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval)
-{
+void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval) {
igzstream infile (file_kd.c_str(), igzstream::in);
-// ifstream infile (file_kd.c_str(), ifstream::in);
- if (!infile) {cout<<"error! fail to open the D file: "<<file_kd<<endl; error=true; return;}
+ if (!infile) {
+ cout<<"error! fail to open the D file: "<<file_kd<<endl;
+ error=true;
+ return;
+ }
size_t n_row=eval->size, i_row=0;
@@ -1109,13 +1231,21 @@ void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval)
double d;
while (getline(infile, line)) {
- if (i_row==n_row) {cout<<"error! number of rows in the D file is larger than expected."<<endl; error=true;}
+ if (i_row==n_row) {
+ cout<<"error! number of rows in the D file is larger "<<
+ "than expected."<<endl;
+ error=true;
+ }
ch_ptr=strtok ((char *)line.c_str(), " , \t");
d=atof(ch_ptr);
ch_ptr=strtok (NULL, " , \t");
- if (ch_ptr!=NULL) {cout<<"error! number of columns in the D file is larger than expected, for row = "<<i_row<<endl; error=true;}
+ if (ch_ptr!=NULL) {
+ cout<<"error! number of columns in the D file is larger "<<
+ "than expected, for row = "<<i_row<<endl;
+ error=true;
+ }
gsl_vector_set (eval, i_row, d);
@@ -1128,14 +1258,15 @@ void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval)
return;
}
-
-
-//read bimbam mean genotype file and calculate kinship matrix
-bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
-{
+// Read bimbam mean genotype file and calculate kinship matrix.
+bool BimbamKin (const string &file_geno, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace,
+ gsl_matrix *matrix_kin) {
igzstream infile (file_geno.c_str(), igzstream::in);
- //ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading genotype file:"<<file_geno<<endl;
+ return false;
+ }
string line;
char *ch_ptr;
@@ -1147,7 +1278,7 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
gsl_vector *geno=gsl_vector_alloc (ni_total);
gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
- //create a large matrix
+ // Create a large matrix.
size_t msize=10000;
gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize);
gsl_matrix_set_zero(Xlarge);
@@ -1155,7 +1286,9 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
size_t ns_test=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
!safeGetline(infile, line).eof();
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
+ ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
+ }
if (indicator_snp[t]==0) {continue;}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -1166,8 +1299,9 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
gsl_vector_set_all(geno_miss, 0);
for (size_t i=0; i<ni_total; ++i) {
ch_ptr=strtok (NULL, " , \t");
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(geno_miss, i, 0); n_miss++;}
- else {
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set(geno_miss, i, 0); n_miss++;
+ } else {
d=atof(ch_ptr);
gsl_vector_set (geno, i, d);
gsl_vector_set (geno_miss, i, 1);
@@ -1180,36 +1314,27 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
geno_var+=geno_mean*geno_mean*(double)n_miss;
geno_var/=(double)ni_total;
geno_var-=geno_mean*geno_mean;
-// geno_var=geno_mean*(1-geno_mean*0.5);
for (size_t i=0; i<ni_total; ++i) {
- if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+ if (gsl_vector_get (geno_miss, i)==0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
}
gsl_vector_add_constant (geno, -1.0*geno_mean);
- /*
- if (geno_var!=0) {
- if (k_mode==1) {
- gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);
- //eigenlib_dsyr (1.0, geno, matrix_kin);
- } else if (k_mode==2) {
- gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);
- //eigenlib_dsyr (1.0/geno_var, geno, matrix_kin);
- } else {
- cout<<"Unknown kinship mode."<<endl;
- }
+ if (k_mode==2 && geno_var!=0) {
+ gsl_vector_scale (geno, 1.0/sqrt(geno_var));
}
- */
-
- if (k_mode==2 && geno_var!=0) {gsl_vector_scale (geno, 1.0/sqrt(geno_var));}
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, ns_test%msize);
+ gsl_vector_view Xlarge_col=
+ gsl_matrix_column (Xlarge, ns_test%msize);
gsl_vector_memcpy (&Xlarge_col.vector, geno);
ns_test++;
if (ns_test%msize==0) {
- eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0,
+ matrix_kin);
gsl_matrix_set_zero(Xlarge);
}
}
@@ -1238,16 +1363,14 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
return true;
}
-
-
-
-
-
-
-bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
-{
+bool PlinkKin (const string &file_bed, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace,
+ gsl_matrix *matrix_kin) {
ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading bed file:"<<file_bed<<endl;
+ return false;
+ }
char ch[1];
bitset<8> b;
@@ -1261,12 +1384,12 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
size_t ns_test=0;
int n_bit;
- //create a large matrix
+ // Create a large matrix.
size_t msize=10000;
gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize);
gsl_matrix_set_zero(Xlarge);
- //calculate n_bit and c, the number of bit for each snp
+ // Calculate n_bit and c, the number of bit for each snp.
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
@@ -1277,26 +1400,46 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
}
for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
+ ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
+ }
if (indicator_snp[t]==0) {continue;}
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t*n_bit+3);
- //read genotypes
+ // Read genotypes.
geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
- if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j=0; j<4; ++j) {
+ if ((i==(n_bit-1)) && ci_total==ni_total) {
+ break;
+ }
if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(geno, ci_total, 2.0); geno_mean+=2.0; geno_var+=4.0; }
- else {gsl_vector_set(geno, ci_total, 1.0); geno_mean+=1.0; geno_var+=1.0;}
+ if (b[2*j+1]==0) {
+ gsl_vector_set(geno, ci_total, 2.0);
+ geno_mean+=2.0;
+ geno_var+=4.0;
+ }
+ else {
+ gsl_vector_set(geno, ci_total, 1.0);
+ geno_mean+=1.0;
+ geno_var+=1.0;
+ }
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(geno, ci_total, 0.0); }
- else {gsl_vector_set(geno, ci_total, -9.0); n_miss++; }
+ if (b[2*j+1]==1) {
+ gsl_vector_set(geno,ci_total,0.0);
+ }
+ else {
+ gsl_vector_set(geno,ci_total,-9.0);
+ n_miss++;
+ }
}
ci_total++;
@@ -1307,7 +1450,6 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
geno_var+=geno_mean*geno_mean*(double)n_miss;
geno_var/=(double)ni_total;
geno_var-=geno_mean*geno_mean;
-// geno_var=geno_mean*(1-geno_mean*0.5);
for (size_t i=0; i<ni_total; ++i) {
d=gsl_vector_get(geno,i);
@@ -1316,22 +1458,17 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
gsl_vector_add_constant (geno, -1.0*geno_mean);
- /*
- if (geno_var!=0) {
- if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
- else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
- else {cout<<"Unknown kinship mode."<<endl;}
+ if (k_mode==2 && geno_var!=0) {
+ gsl_vector_scale (geno, 1.0/sqrt(geno_var));
}
- */
-
- if (k_mode==2 && geno_var!=0) {gsl_vector_scale (geno, 1.0/sqrt(geno_var));}
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, ns_test%msize);
+ gsl_vector_view Xlarge_col=
+ gsl_matrix_column (Xlarge, ns_test%msize);
gsl_vector_memcpy (&Xlarge_col.vector, geno);
ns_test++;
if (ns_test%msize==0) {
- eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
gsl_matrix_set_zero(Xlarge);
}
}
@@ -1360,16 +1497,16 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
return true;
}
-
-
-
-
-//Read bimbam mean genotype file, the second time, recode "mean" genotype and calculate K
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K)
-{
+// Read bimbam mean genotype file, the second time, recode "mean"
+// genotype and calculate K.
+bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, gsl_matrix *UtX,
+ gsl_matrix *K, const bool calc_K) {
igzstream infile (file_geno.c_str(), igzstream::in);
-// ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading genotype file:"<<file_geno<<endl;
+ return false;
+ }
string line;
char *ch_ptr;
@@ -1402,8 +1539,10 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[j]==0) {continue;}
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++;}
- else {
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set (genotype_miss, c_idv, 1);
+ n_miss++;
+ } else {
geno=atof(ch_ptr);
gsl_vector_set (genotype, c_idv, geno);
geno_mean+=geno;
@@ -1414,14 +1553,21 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
geno_mean/=(double)(ni_test-n_miss);
for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {geno=0;}
- else {geno=gsl_vector_get (genotype, i); geno-=geno_mean;}
+ if (gsl_vector_get (genotype_miss, i)==1) {
+ geno=0;
+ }
+ else {
+ geno=gsl_vector_get (genotype, i);
+ geno-=geno_mean;
+ }
gsl_vector_set (genotype, i, geno);
gsl_matrix_set (UtX, i, c_snp, geno);
}
- if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
+ if (calc_K==true) {
+ gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
+ }
c_snp++;
}
@@ -1446,14 +1592,18 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
return true;
}
-
-
-//compact version of the above function, using uchar instead of gsl_matrix
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test)
-{
+// Compact version of the above function, using uchar instead of
+// gsl_matrix.
+bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
+ vector<int> &indicator_snp,
+ vector<vector<unsigned char> > &Xt,
+ gsl_matrix *K, const bool calc_K, const size_t ni_test,
+ const size_t ns_test) {
igzstream infile (file_geno.c_str(), igzstream::in);
- // ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading genotype file:"<<file_geno<<endl;
+ return false;
+ }
Xt.clear();
vector<unsigned char> Xt_row;
@@ -1490,7 +1640,10 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[j]==0) {continue;}
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++;} else {
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set (genotype_miss, c_idv, 1);
+ n_miss++;
+ } else {
geno=atof(ch_ptr);
gsl_vector_set (genotype, c_idv, geno);
geno_mean+=geno;
@@ -1512,7 +1665,9 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
}
Xt.push_back(Xt_row);
- if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
+ if (calc_K==true) {
+ gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
+ }
c_snp++;
}
@@ -1537,14 +1692,16 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
return true;
}
-
-
-
-//Read bimbam mean genotype file, the second time, recode "mean" genotype and calculate K
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K)
-{
+// Read bimbam mean genotype file, the second time, recode "mean"
+// genotype and calculate K.
+bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, gsl_matrix *UtX,
+ gsl_matrix *K, const bool calc_K) {
ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading bed file:"<<file_bed<<endl;
+ return false;
+ }
char ch[1];
bitset<8> b;
@@ -1558,7 +1715,7 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1;}
- //print the first three majic numbers
+ // Print the first three magic numbers.
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
@@ -1572,28 +1729,44 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
size_t n_miss;
size_t c_idv=0, c_snp=0, c=0;
- //start reading snps and doing association test
+ // Start reading snps and doing association test.
for (size_t t=0; t<ns_total; ++t) {
if (indicator_snp[t]==0) {continue;}
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
- //read genotypes
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t*n_bit+3);
+
+ // Read genotypes.
c_idv=0; geno_mean=0.0; n_miss=0; c=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && c==ni_total) {break;}
if (indicator_idv[c]==0) {c++; continue;}
c++;
if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); geno_mean+=2.0;}
- else {gsl_vector_set(genotype, c_idv, 1.0); geno_mean+=1.0;}
+ if (b[2*j+1]==0) {
+ gsl_vector_set(genotype, c_idv, 2.0);
+ geno_mean+=2.0;
+ }
+ else {
+ gsl_vector_set(genotype, c_idv, 1.0);
+ geno_mean+=1.0;
+ }
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
- else {gsl_vector_set(genotype, c_idv, -9.0); n_miss++;}
+ if (b[2*j+1]==1) {
+ gsl_vector_set(genotype, c_idv, 0.0);
+ geno_mean+=0.0;
+ }
+ else {
+ gsl_vector_set(genotype, c_idv, -9.0);
+ n_miss++;
+ }
}
c_idv++;
}
@@ -1610,7 +1783,9 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
gsl_matrix_set (UtX, i, c_snp, geno);
}
- if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
+ if (calc_K==true) {
+ gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
+ }
c_snp++;
}
@@ -1633,14 +1808,17 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
return true;
}
-
-
-
-//compact version of the above function, using uchar instead of gsl_matrix
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test)
-{
+// Compact version of the above function, using uchar instead of gsl_matrix.
+bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
+ vector<int> &indicator_snp,
+ vector<vector<unsigned char> > &Xt, gsl_matrix *K,
+ const bool calc_K, const size_t ni_test,
+ const size_t ns_test) {
ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading bed file:"<<file_bed<<endl;
+ return false;
+ }
Xt.clear();
vector<unsigned char> Xt_row;
@@ -1658,7 +1836,7 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1;}
- //print the first three majic numbers
+ // Print the first three magic numbers.
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
@@ -1672,28 +1850,44 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
size_t n_miss;
size_t c_idv=0, c_snp=0, c=0;
- //start reading snps and doing association test
+ // Start reading SNPs and doing association test.
for (size_t t=0; t<ns_total; ++t) {
if (indicator_snp[t]==0) {continue;}
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
- //read genotypes
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t*n_bit+3);
+
+ // Read genotypes.
c_idv=0; geno_mean=0.0; n_miss=0; c=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && c==ni_total) {break;}
if (indicator_idv[c]==0) {c++; continue;}
c++;
if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); geno_mean+=2.0;}
- else {gsl_vector_set(genotype, c_idv, 1.0); geno_mean+=1.0;}
+ if (b[2*j+1]==0) {
+ gsl_vector_set(genotype, c_idv, 2.0);
+ geno_mean+=2.0;
+ }
+ else {
+ gsl_vector_set(genotype, c_idv, 1.0);
+ geno_mean+=1.0;
+ }
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
- else {gsl_vector_set(genotype, c_idv, -9.0); n_miss++;}
+ if (b[2*j+1]==1) {
+ gsl_vector_set(genotype, c_idv, 0.0);
+ geno_mean+=0.0;
+ }
+ else {
+ gsl_vector_set(genotype, c_idv, -9.0);
+ n_miss++;
+ }
}
c_idv++;
}
@@ -1713,7 +1907,9 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
}
Xt.push_back(Xt_row);
- if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
+ if (calc_K==true) {
+ gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
+ }
c_snp++;
}
@@ -1736,18 +1932,15 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
return true;
}
-
-
-
-
-
-
-bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map<string, double> &mapRS2est)
-{
+bool ReadFile_est (const string &file_est, const vector<size_t> &est_column,
+ map<string, double> &mapRS2est) {
mapRS2est.clear();
ifstream infile (file_est.c_str(), ifstream::in);
- if (!infile) {cout<<"error opening estimated parameter file: "<<file_est<<endl; return false;}
+ if (!infile) {
+ cout<<"error opening estimated parameter file: "<<file_est<<endl;
+ return false;
+ }
string line;
char *ch_ptr;
@@ -1755,7 +1948,7 @@ bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map
string rs;
double alpha, beta, gamma, d;
- //header
+ // Header.
getline(infile, line);
size_t n=*max_element(est_column.begin(), est_column.end());
@@ -1778,7 +1971,9 @@ bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map
mapRS2est[rs]=d;
}
else {
- cout<<"the same SNP occurs more than once in estimated parameter file: "<<rs<<endl; return false;
+ cout << "the same SNP occurs more than once in estimated "<<
+ "parameter file: "<<rs<<endl;
+ return false;
}
}
@@ -1787,13 +1982,12 @@ bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map
return true;
}
-
-
-bool CountFileLines (const string &file_input, size_t &n_lines)
-{
+bool CountFileLines (const string &file_input, size_t &n_lines) {
igzstream infile (file_input.c_str(), igzstream::in);
- //ifstream infile (file_input.c_str(), ifstream::in);
- if (!infile) {cout<<"error! fail to open file: "<<file_input<<endl; return false;}
+ if (!infile) {
+ cout<<"error! fail to open file: "<<file_input<<endl;
+ return false;
+ }
n_lines=count(istreambuf_iterator<char>(infile), istreambuf_iterator<char>(), '\n');
infile.seekg (0, ios::beg);
@@ -1801,16 +1995,17 @@ bool CountFileLines (const string &file_input, size_t &n_lines)
return true;
}
-
-
-//Read gene expression file
-bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SNPINFO> &snpInfo, size_t &ng_total)
-{
+// Read gene expression file.
+bool ReadFile_gene (const string &file_gene, vector<double> &vec_read,
+ vector<SNPINFO> &snpInfo, size_t &ng_total) {
vec_read.clear();
ng_total=0;
igzstream infile (file_gene.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open gene expression file: "<<file_gene<<endl; return false;}
+ if (!infile) {
+ cout<<"error! fail to open gene expression file: "<<file_gene<<endl;
+ return false;
+ }
string line;
char *ch_ptr;
@@ -1818,7 +2013,7 @@ bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SN
size_t n_idv=0, t=0;
- //header
+ // Header.
getline(infile, line);
while (getline(infile, line)) {
@@ -1841,9 +2036,13 @@ bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SN
ch_ptr=strtok (NULL, " , \t");
}
- if (t!=n_idv) {cout<<"error! number of columns doesn't match in row: "<<ng_total<<endl; return false;}
+ if (t!=n_idv) {
+ cout<<"error! number of columns doesn't match in row: "<<
+ ng_total<<endl;
+ return false;
+ }
- SNPINFO sInfo={"-9", rs, -9, -9, "-9", "-9", 0, -9, -9, 0, 0, 0};
+ SNPINFO sInfo={"-9",rs,-9,-9,"-9","-9",0,-9,-9,0,0,0};
snpInfo.push_back(sInfo);
ng_total++;
@@ -1855,28 +2054,28 @@ bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SN
return true;
}
-
-
-
-
-
-
// WJA Added
-//Read Oxford sample file
-bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, const vector<size_t> &p_column, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt)
-{
+// Read Oxford sample file.
+bool ReadFile_sample (const string &file_sample,
+ vector<vector<int> > &indicator_pheno,
+ vector<vector<double> > &pheno,
+ const vector<size_t> &p_column,
+ vector<int> &indicator_cvt,
+ vector<vector<double> > &cvt, size_t &n_cvt) {
indicator_pheno.clear();
pheno.clear();
indicator_cvt.clear();
igzstream infile (file_sample.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open sample file: "<<file_sample<<endl; return false;}
+ if (!infile) {
+ cout<<"error! fail to open sample file: "<<file_sample<<endl;
+ return false;
+ }
string line;
char *ch_ptr;
-
string id;
double p,d;
@@ -1888,8 +2087,6 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
size_t num_p_in_file=0;
size_t num_cvt_in_file=0;
-// size_t p_max=*max_element(p_column.begin(), p_column.end());
-
map<size_t, size_t> mapP2c;
for (size_t i=0; i<p_column.size(); i++) {
mapP2c[p_column[i]]=i;
@@ -1897,7 +2094,7 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
ind_pheno_row.push_back(0);
}
- // read header line1
+ // Read header line1.
if(!safeGetline(infile, line).eof()) {
ch_ptr=strtok((char *)line.c_str(), " \t");
if(strcmp(ch_ptr, "ID_1")!=0) {return false;}
@@ -1916,7 +2113,8 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
vector<map<uint32_t, size_t> > cvt_factor_levels;
char col_type[num_cols];
- // read header line2
+
+ // Read header line2.
if(!safeGetline(infile, line).eof()) {
ch_ptr=strtok ((char *)line.c_str(), " \t");
if(strcmp(ch_ptr, "0")!=0) {return false;}
@@ -1927,13 +2125,17 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
size_t it=0;
ch_ptr=strtok (NULL, " \t");
if(ch_ptr!=NULL)
- while(ch_ptr!=NULL){
- col_type[it++]=ch_ptr[0];
- if(ch_ptr[0]=='D') {cvt_factor_levels.push_back(map<uint32_t, size_t>());num_cvt_in_file++;}
- if(ch_ptr[0]=='C') {num_cvt_in_file++;}
- if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) {num_p_in_file++;}
- ch_ptr=strtok(NULL, " \t");
- }
+ while(ch_ptr!=NULL){
+ col_type[it++]=ch_ptr[0];
+ if(ch_ptr[0]=='D') {
+ cvt_factor_levels.push_back(map<uint32_t,size_t>());
+ num_cvt_in_file++;
+ }
+ if(ch_ptr[0]=='C') {num_cvt_in_file++;}
+ if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) {
+ num_p_in_file++;}
+ ch_ptr=strtok(NULL, " \t");
+ }
}
@@ -1943,7 +2145,6 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");}
-
size_t i=0;
size_t p_i=0;
size_t fac_cvt_i=0;
@@ -1952,42 +2153,62 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
if((col_type[i]=='P')||(col_type[i]=='B'))
{
- if (mapP2c.count(p_i+1)!=0) {
- if (strcmp(ch_ptr, "NA")==0) {ind_pheno_row[mapP2c[p_i+1]]=0; pheno_row[mapP2c[p_i+1]]=-9;}
- else {p=atof(ch_ptr); ind_pheno_row[mapP2c[p_i+1]]=1; pheno_row[mapP2c[p_i+1]]=p;}
- }
- p_i++;
+ if (mapP2c.count(p_i+1)!=0) {
+ if (strcmp(ch_ptr, "NA")==0) {
+ ind_pheno_row[mapP2c[p_i+1]]=0;
+ pheno_row[mapP2c[p_i+1]]=-9;
+ }
+ else {
+ p=atof(ch_ptr);
+ ind_pheno_row[mapP2c[p_i+1]]=1;
+ pheno_row[mapP2c[p_i+1]]=p;
+ }
+ }
+ p_i++;
}
if(col_type[i]=='D')
{
- // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL IS INTEGRAL i.e for atoi error
- if (strcmp(ch_ptr, "NA")!=0) {uint32_t level=atoi(ch_ptr); if(cvt_factor_levels[fac_cvt_i].count(level) == 0) {cvt_factor_levels[fac_cvt_i][level]=cvt_factor_levels[fac_cvt_i].size();}}
- fac_cvt_i++;
+
+ // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL
+ // IS INTEGRAL i.e for atoi error.
+ if (strcmp(ch_ptr, "NA")!=0) {
+ uint32_t level=atoi(ch_ptr);
+ if (cvt_factor_levels[fac_cvt_i].count(level)==0) {
+ cvt_factor_levels[fac_cvt_i][level]=
+ cvt_factor_levels[fac_cvt_i].size();
+ }
+ }
+ fac_cvt_i++;
}
ch_ptr=strtok (NULL, " \t");
i++;
}
-
indicator_pheno.push_back(ind_pheno_row);
pheno.push_back(pheno_row);
}
- // close and reopen the file
+
+ // Close and reopen the file.
infile.close();
infile.clear();
- if(num_cvt_in_file>0)
- {
+ if(num_cvt_in_file>0) {
igzstream infile2 (file_sample.c_str(), igzstream::in);
- if (!infile2) {cout<<"error! fail to open sample file: "<<file_sample<<endl; return false;}
- // skip header
+ if (!infile2) {
+ cout<<"error! fail to open sample file: "<<
+ file_sample<<endl;
+ return false;
+ }
+
+ // Skip header.
safeGetline(infile2, line);
safeGetline(infile2, line);
- // pull in the covariates now we now the number of factor levels
+ // Pull in the covariates now we now the number of
+ // factor levels.
while (!safeGetline(infile2, line).eof()) {
vector<double> v_d; flag_na=0;
@@ -1995,38 +2216,51 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");}
-
size_t i=0;
size_t fac_cvt_i=0;
size_t num_fac_levels;
while (i<num_cols) {
-
- if(col_type[i]=='C')
- {
- if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
- else {d=atof(ch_ptr);}
-
- v_d.push_back(d);
+
+ if(col_type[i]=='C') {
+ if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
+ else {d=atof(ch_ptr);}
+
+ v_d.push_back(d);
+ }
+
+ if(col_type[i]=='D') {
+
+ // NOTE THIS DOES NOT CHECK TO BE SURE
+ // LEVEL IS INTEGRAL i.e for atoi error.
+ num_fac_levels=cvt_factor_levels[fac_cvt_i].size();
+ if(num_fac_levels>1) {
+ if (strcmp(ch_ptr, "NA")==0) {
+ flag_na=1;
+ for(size_t it=0;it<num_fac_levels-1; it++) {
+ v_d.push_back(-9);
}
-
-
- if(col_type[i]=='D')
- {
- // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL IS INTEGRAL i.e for atoi error
- num_fac_levels=cvt_factor_levels[fac_cvt_i].size();
- if(num_fac_levels>1)
- {
- if (strcmp(ch_ptr, "NA")==0) {flag_na=1; for(size_t it=0;it<num_fac_levels-1; it++) {v_d.push_back(-9);}}
- else {uint32_t level=atoi(ch_ptr); for(size_t it=0;it<num_fac_levels-1;it++) {cvt_factor_levels[fac_cvt_i][level]==it+1 ? v_d.push_back(1.0) : v_d.push_back(0.0); }}
- }
- fac_cvt_i++;
+ }
+ else {
+ uint32_t level=atoi(ch_ptr);
+ for(size_t it=0;it<num_fac_levels-1;it++) {
+ cvt_factor_levels[fac_cvt_i][level]==it+1 ?
+ v_d.push_back(1.0) :
+ v_d.push_back(0.0);
}
-
- ch_ptr=strtok (NULL, " \t");
- i++;
+ }
+ }
+ fac_cvt_i++;
+ }
+
+ ch_ptr=strtok (NULL, " \t");
+ i++;
}
- if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);}
+ if (flag_na==0) {
+ indicator_cvt.push_back(1);
+ } else {
+ indicator_cvt.push_back(0);
+ }
cvt.push_back(v_d);
@@ -2035,11 +2269,20 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
if (indicator_cvt.empty()) {n_cvt=0;}
else {
flag_na=0;
- for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
+ for (vector<int>::size_type i=0;
+ i<indicator_cvt.size();
+ ++i) {
if (indicator_cvt[i]==0) {continue;}
- if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
- if (flag_na!=0 && n_cvt!=cvt[i].size()) {cout<<"error! number of covariates in row "<<i<<" do not match other rows."<<endl; return false;}
+ if (flag_na==0) {
+ flag_na=1;
+ n_cvt=cvt[i].size();
+ }
+ if (flag_na!=0 && n_cvt!=cvt[i].size()) {
+ cout<<"error! number of covariates in row "<<
+ i<<" do not match other rows."<<endl;
+ return false;
+ }
}
}
@@ -2049,19 +2292,22 @@ bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_
return true;
}
-
-
-// WJA Added
-//Read bgen file, the first time
-#include <cstdint>
-#include <assert.h>
-bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test)
-{
+// WJA Added.
+// Read bgen file, the first time.
+bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+ const double &maf_level, const double &miss_level,
+ const double &hwe_level, const double &r2_level,
+ size_t &ns_test) {
indicator_snp.clear();
ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading bgen file:"<<file_bgen<<endl;
+ return false;
+ }
gsl_vector *genotype=gsl_vector_alloc (W->size1);
gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
@@ -2075,8 +2321,8 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
int sig;
LUDecomp (WtW, pmt, &sig);
LUInvert (WtW, pmt, WtWi);
-
- // read in header
+
+ // Read in header.
uint32_t bgen_snp_block_offset;
uint32_t bgen_header_length;
uint32_t bgen_nsamples;
@@ -2108,7 +2354,6 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
string rs;
long int b_pos;
string chr;
-// double cM;
string major;
string minor;
string id;
@@ -2116,17 +2361,19 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
double v_x, v_w;
int c_idv=0;
-
double maf, geno, geno_old;
size_t n_miss;
size_t n_0, n_1, n_2;
int flag_poly;
- double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+ double bgen_geno_prob_AA, bgen_geno_prob_AB;
+ double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+ // Total number of samples in phenotype file.
+ size_t ni_total=indicator_idv.size();
- size_t ni_total=indicator_idv.size(); // total number of samples in phenotype file
- size_t ni_test=0; // number of samples to use in test
+ // Number of samples to use in test.
+ size_t ni_test=0;
uint32_t bgen_N;
uint16_t bgen_LS;
@@ -2141,13 +2388,9 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
size_t unzipped_data_size;
for (size_t i=0; i<ni_total; ++i) {
-
- ni_test+=indicator_idv[i];
+ ni_test+=indicator_idv[i];
}
-
-
-// ns_total=1;
for (size_t t=0; t<ns_total; ++t) {
id.clear();
@@ -2181,8 +2424,7 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
bgen_B_allele.resize(bgen_LB);
infile.read(&bgen_B_allele[0], bgen_LB);
-
- // should we switch according to MAF?
+ // Should we switch according to MAF?
minor=bgen_B_allele;
major=bgen_A_allele;
b_pos=static_cast<long int>(bgen_SNP_pos);
@@ -2196,16 +2438,15 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
snpInfo.push_back(sInfo);
indicator_snp.push_back(0);
if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
else
- bgen_P=6*bgen_N;
+ bgen_P=6*bgen_N;
infile.ignore(static_cast<size_t>(bgen_P));
continue;
}
-
if(CompressedSNPBlocks)
{
infile.read(reinterpret_cast<char*>(&bgen_P),4);
@@ -2213,36 +2454,48 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
unzipped_data_size=6*bgen_N;
- infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
- int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ infile.read(reinterpret_cast<char*>(zipped_data),
+ bgen_P);
+ int result=
+ uncompress(reinterpret_cast<Bytef*>(unzipped_data),
+ reinterpret_cast<uLongf*>(&unzipped_data_size),
+ reinterpret_cast<Bytef*>(zipped_data),
+ static_cast<uLong> (bgen_P));
assert(result == Z_OK);
}
else
{
- bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
}
-
maf=0; n_miss=0; flag_poly=0; geno_old=-9;
n_0=0; n_1=0; n_2=0;
c_idv=0;
gsl_vector_set_zero (genotype_miss);
for (size_t i=0; i<bgen_N; ++i) {
+
// CHECK this set correctly!
if (indicator_idv[i]==0) {continue;}
-
- bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
- bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
-
- //CHECK 0.1 OK
- if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue;}
-
+ bgen_geno_prob_AA=
+ static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=
+ static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=
+ static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ bgen_geno_prob_non_miss=
+ bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+
+ //CHECK 0.1 OK.
+ if (bgen_geno_prob_non_miss<0.9) {
+ gsl_vector_set (genotype_miss, c_idv, 1);
+ n_miss++;
+ c_idv++;
+ continue;
+ }
bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
@@ -2255,7 +2508,7 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
gsl_vector_set (genotype, c_idv, geno);
- // CHECK WHAT THIS DOES
+ // CHECK WHAT THIS DOES.
if (flag_poly==0) {geno_old=geno; flag_poly=2;}
if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
@@ -2266,23 +2519,39 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
maf/=2.0*static_cast<double>(ni_test-n_miss);
- SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf};
+ SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss,
+ (double)n_miss/(double)ni_test, maf};
snpInfo.push_back(sInfo);
- if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
+ if ( (double)n_miss/(double)ni_test > miss_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
- if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
+ if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
- if (flag_poly!=1) {indicator_snp.push_back(0); continue;}
+ if (flag_poly!=1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
if (hwe_level!=0 && maf_level!=-1) {
- if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
+ if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
}
- //filter SNP if it is correlated with W
- //unless W has only one column, of 1s
+ // Filter SNP if it is correlated with W
+ // unless W has only one column, of 1s.
for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
+ if (gsl_vector_get (genotype_miss, i)==1) {
+ geno=maf*2.0;
+ gsl_vector_set (genotype, i, geno);
+ }
}
gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
@@ -2290,30 +2559,29 @@ bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gs
gsl_blas_ddot (genotype, genotype, &v_x);
gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
- if (W->size2!=1 && v_w/v_x >= r2_level) {indicator_snp.push_back(0); continue;}
+ if (W->size2!=1 && v_w/v_x >= r2_level) {
+ indicator_snp.push_back(0); continue;}
indicator_snp.push_back(1);
ns_test++;
}
-
-
-
return true;
-
}
-
-//read oxford genotype file and calculate kinship matrix
-bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
-{
+// Read oxford genotype file and calculate kinship matrix.
+bool bgenKin (const string &file_oxford, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace,
+ gsl_matrix *matrix_kin) {
string file_bgen=file_oxford;
ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return false;}
-
+ if (!infile) {
+ cout<<"error reading bgen file:"<<file_bgen<<endl;
+ return false;
+ }
- // read in header
+ // Read in header.
uint32_t bgen_snp_block_offset;
uint32_t bgen_header_length;
uint32_t bgen_nsamples;
@@ -2331,11 +2599,11 @@ bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k
infile.read(reinterpret_cast<char*>(&bgen_flags),4);
bgen_snp_block_offset-=4;
bool CompressedSNPBlocks=bgen_flags&0x1;
-// bool LongIds=bgen_flags&0x4;
infile.ignore(bgen_snp_block_offset);
- double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+ double bgen_geno_prob_AA, bgen_geno_prob_AB;
+ double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
uint32_t bgen_N;
uint16_t bgen_LS;
@@ -2353,7 +2621,6 @@ bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k
string chr;
double genotype;
-
size_t n_miss;
double d, geno_mean, geno_var;
@@ -2364,7 +2631,9 @@ bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k
size_t ns_test=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
+ ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
+ }
id.clear();
rs.clear();
@@ -2396,74 +2665,76 @@ bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k
infile.read(reinterpret_cast<char*>(&bgen_LB),4);
bgen_B_allele.resize(bgen_LB);
infile.read(&bgen_B_allele[0], bgen_LB);
-
-
-
-
+
uint16_t unzipped_data[3*bgen_N];
if (indicator_snp[t]==0) {
if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
else
- bgen_P=6*bgen_N;
+ bgen_P=6*bgen_N;
infile.ignore(static_cast<size_t>(bgen_P));
continue;
}
-
-
if(CompressedSNPBlocks)
{
-
-
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size=6*bgen_N;
-
- infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
-
- int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size=6*bgen_N;
+
+ infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+
+ int result=
+ uncompress(reinterpret_cast<Bytef*>(unzipped_data),
+ reinterpret_cast<uLongf*>(&unzipped_data_size),
+ reinterpret_cast<Bytef*>(zipped_data),
+ static_cast<uLong> (bgen_P));
assert(result == Z_OK);
}
else
{
-
- bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
}
-
-
geno_mean=0.0; n_miss=0; geno_var=0.0;
gsl_vector_set_all(geno_miss, 0);
for (size_t i=0; i<bgen_N; ++i) {
-
- bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
- // WJA
- bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(geno_miss, i, 0.0); n_miss++;}
- else {
-
- bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
- bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
- bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
- genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
- gsl_vector_set(geno, i, genotype);
- gsl_vector_set(geno_miss, i, 1.0);
- geno_mean+=genotype;
- geno_var+=genotype*genotype;
- }
+
+ bgen_geno_prob_AA=
+ static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=
+ static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=
+ static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ // WJA
+ bgen_geno_prob_non_miss=bgen_geno_prob_AA +
+ bgen_geno_prob_AB+bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss<0.9) {
+ gsl_vector_set(geno_miss, i, 0.0);
+ n_miss++;
+ }
+ else {
+
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+ gsl_vector_set(geno, i, genotype);
+ gsl_vector_set(geno_miss, i, 1.0);
+ geno_mean+=genotype;
+ geno_var+=genotype*genotype;
+ }
}
@@ -2472,18 +2743,24 @@ bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k
geno_var+=geno_mean*geno_mean*(double)n_miss;
geno_var/=(double)ni_total;
geno_var-=geno_mean*geno_mean;
-// geno_var=geno_mean*(1-geno_mean*0.5);
for (size_t i=0; i<ni_total; ++i) {
- if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+ if (gsl_vector_get (geno_miss, i)==0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
}
gsl_vector_add_constant (geno, -1.0*geno_mean);
if (geno_var!=0) {
- if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
- else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
- else {cout<<"Unknown kinship mode."<<endl;}
+ if (k_mode==1) {
+ gsl_blas_dsyr(CblasUpper,1.0,geno,matrix_kin);
+ } else if (k_mode==2) {
+ gsl_blas_dsyr(CblasUpper,1.0/geno_var,geno,matrix_kin);
+ }
+ else {
+ cout<<"Unknown kinship mode."<<endl;
+ }
}
ns_test++;
@@ -2508,42 +2785,23 @@ bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k
return true;
}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-//read header to determine which column contains which item
+// Read header to determine which column contains which item.
bool ReadHeader_io (const string &line, HEADER &header)
{
- string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID","MarkerName"};
+ string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID",
+ "rsid","RSID","MarkerName"};
set<string> rs_set(rs_ptr, rs_ptr+11);
string chr_ptr[]={"chr","CHR"};
set<string> chr_set(chr_ptr, chr_ptr+2);
- string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", "bp", "BP"};
+ string pos_ptr[]={"ps","PS","pos","POS","base_position",
+ "BASE_POSITION", "bp", "BP"};
set<string> pos_set(pos_ptr, pos_ptr+8);
string cm_ptr[]={"cm","CM"};
set<string> cm_set(cm_ptr, cm_ptr+2);
string a1_ptr[]={"a1","A1","allele1","ALLELE1","Allele1","INC_ALLELE"};
set<string> a1_set(a1_ptr, a1_ptr+5);
- string a0_ptr[]={"a0","A0","allele0","ALLELE0","Allele0","a2","A2","allele2","ALLELE2","Allele2","DEC_ALLELE"};
+ string a0_ptr[]={"a0","A0","allele0","ALLELE0","Allele0","a2","A2",
+ "allele2","ALLELE2","Allele2","DEC_ALLELE"};
set<string> a0_set(a0_ptr, a0_ptr+10);
string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
@@ -2568,7 +2826,10 @@ bool ReadHeader_io (const string &line, HEADER &header)
string ncontrol_ptr[]={"ncontrol","NCONTROL","n_control","N_CONTROL"};
set<string> ncontrol_set(ncontrol_ptr, ncontrol_ptr+4);
- string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq","ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY","Freq.Allele1.HapMapCEU","FreqAllele1HapMapCEU", "Freq1.Hapmap"};
+ string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq",
+ "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY",
+ "Freq.Allele1.HapMapCEU","FreqAllele1HapMapCEU",
+ "Freq1.Hapmap"};
set<string> af_set(af_ptr, af_ptr+13);
string var_ptr[]={"var","VAR"};
set<string> var_set(var_ptr, var_ptr+2);
@@ -2578,7 +2839,13 @@ bool ReadHeader_io (const string &line, HEADER &header)
string cor_ptr[]={"cor","COR","r","R"};
set<string> cor_set(cor_ptr, cor_ptr+4);
- header.rs_col=0; header.chr_col=0; header.pos_col=0; header.cm_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; header.nobs_col=0; header.ncase_col=0; header.ncontrol_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; header.cor_col=0; header.coln=0;
+ header.rs_col=0; header.chr_col=0; header.pos_col=0;
+ header.cm_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0;
+ header.beta_col=0; header.sebeta_col=0; header.chisq_col=0;
+ header.p_col=0; header.n_col=0; header.nmis_col=0;
+ header.nobs_col=0; header.ncase_col=0; header.ncontrol_col=0;
+ header.af_col=0; header.var_col=0; header.ws_col=0;
+ header.cor_col=0; header.coln=0;
char *ch_ptr;
string type;
@@ -2588,50 +2855,147 @@ bool ReadHeader_io (const string &line, HEADER &header)
while (ch_ptr!=NULL) {
type=ch_ptr;
if (rs_set.count(type)!=0) {
- if (header.rs_col==0) {header.rs_col=header.coln+1;} else {cout<<"error! more than two rs columns in the file."<<endl; n_error++;}
+ if (header.rs_col==0) {
+ header.rs_col=header.coln+1;
+ } else {
+ cout<<"error! more than two rs columns in the file."<<endl;
+ n_error++;
+ }
} else if (chr_set.count(type)!=0) {
- if (header.chr_col==0) {header.chr_col=header.coln+1;} else {cout<<"error! more than two chr columns in the file."<<endl; n_error++;}
+ if (header.chr_col==0) {
+ header.chr_col=header.coln+1;
+ } else {
+ cout<<"error! more than two chr columns in the file."<<endl;
+ n_error++;
+ }
} else if (pos_set.count(type)!=0) {
- if (header.pos_col==0) {header.pos_col=header.coln+1;} else {cout<<"error! more than two pos columns in the file."<<endl; n_error++;}
+ if (header.pos_col==0) {
+ header.pos_col=header.coln+1;
+ } else {
+ cout<<"error! more than two pos columns in the file."<<endl;
+ n_error++;
+ }
} else if (cm_set.count(type)!=0) {
- if (header.cm_col==0) {header.cm_col=header.coln+1;} else {cout<<"error! more than two cm columns in the file."<<endl; n_error++;}
+ if (header.cm_col==0) {
+ header.cm_col=header.coln+1;
+ } else {
+ cout<<"error! more than two cm columns in the file."<<endl;
+ n_error++;
+ }
} else if (a1_set.count(type)!=0) {
- if (header.a1_col==0) {header.a1_col=header.coln+1;} else {cout<<"error! more than two allele1 columns in the file."<<endl; n_error++;}
+ if (header.a1_col==0) {
+ header.a1_col=header.coln+1;
+ } else {
+ cout<<"error! more than two allele1 columns in the file."<<endl;
+ n_error++;
+ }
} else if (a0_set.count(type)!=0) {
- if (header.a0_col==0) {header.a0_col=header.coln+1;} else {cout<<"error! more than two allele0 columns in the file."<<endl; n_error++;}
+ if (header.a0_col==0) {
+ header.a0_col=header.coln+1;
+ } else {
+ cout<<"error! more than two allele0 columns in the file."<<endl;
+ n_error++;
+ }
} else if (z_set.count(type)!=0) {
- if (header.z_col==0) {header.z_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
+ if (header.z_col==0) {
+ header.z_col=header.coln+1;
+ } else {
+ cout<<"error! more than two z columns in the file."<<endl;
+ n_error++;
+ }
} else if (beta_set.count(type)!=0) {
- if (header.beta_col==0) {header.beta_col=header.coln+1;} else {cout<<"error! more than two beta columns in the file."<<endl; n_error++;}
+ if (header.beta_col==0) {
+ header.beta_col=header.coln+1;
+ } else {
+ cout<<"error! more than two beta columns in the file."<<endl;
+ n_error++;
+ }
} else if (sebeta_set.count(type)!=0) {
- if (header.sebeta_col==0) {header.sebeta_col=header.coln+1;} else {cout<<"error! more than two se_beta columns in the file."<<endl; n_error++;}
+ if (header.sebeta_col==0) {
+ header.sebeta_col=header.coln+1;
+ } else {
+ cout<<"error! more than two se_beta columns in the file."<<endl;
+ n_error++;
+ }
} else if (chisq_set.count(type)!=0) {
- if (header.chisq_col==0) {header.chisq_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
+ if (header.chisq_col==0) {
+ header.chisq_col=header.coln+1;
+ } else {
+ cout<<"error! more than two z columns in the file."<<endl;
+ n_error++;
+ }
} else if (p_set.count(type)!=0) {
- if (header.p_col==0) {header.p_col=header.coln+1;} else {cout<<"error! more than two p columns in the file."<<endl; n_error++;}
+ if (header.p_col==0) {
+ header.p_col=header.coln+1;
+ } else {
+ cout<<"error! more than two p columns in the file."<<endl;
+ n_error++;
+ }
} else if (n_set.count(type)!=0) {
- if (header.n_col==0) {header.n_col=header.coln+1;} else {cout<<"error! more than two n_total columns in the file."<<endl; n_error++;}
+ if (header.n_col==0) {
+ header.n_col=header.coln+1;
+ } else {
+ cout<<"error! more than two n_total columns in the file."<<endl;
+ n_
+ error++;}
} else if (nmis_set.count(type)!=0) {
- if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else {cout<<"error! more than two n_mis columns in the file."<<endl; n_error++;}
+ if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else {
+ cout<<"error! more than two n_mis columns in the file."<<endl;
+ n_error++;
+ }
} else if (nobs_set.count(type)!=0) {
- if (header.nobs_col==0) {header.nobs_col=header.coln+1;} else {cout<<"error! more than two n_obs columns in the file."<<endl; n_error++;}
+ if (header.nobs_col==0) {
+ header.nobs_col=header.coln+1;
+ } else {
+ cout<<"error! more than two n_obs columns in the file."<<endl;
+ n_error++;
+ }
} else if (ncase_set.count(type)!=0) {
- if (header.ncase_col==0) {header.ncase_col=header.coln+1;} else {cout<<"error! more than two n_case columns in the file."<<endl; n_error++;}
+ if (header.ncase_col==0) {
+ header.ncase_col=header.coln+1;
+ } else {
+ cout<<"error! more than two n_case columns in the file."<<endl;
+ n_error++;
+ }
} else if (ncontrol_set.count(type)!=0) {
- if (header.ncontrol_col==0) {header.ncontrol_col=header.coln+1;} else {cout<<"error! more than two n_control columns in the file."<<endl; n_error++;}
+ if (header.ncontrol_col==0) {
+ header.ncontrol_col=header.coln+1;
+ } else {
+ cout<<"error! more than two n_control columns in the file."<<endl;
+ n_error++;
+ }
} else if (ws_set.count(type)!=0) {
- if (header.ws_col==0) {header.ws_col=header.coln+1;} else {cout<<"error! more than two window_size columns in the file."<<endl; n_error++;}
+ if (header.ws_col==0) {
+ header.ws_col=header.coln+1;
+ } else {
+ cout<<"error! more than two window_size columns in the file."<<endl;
+ n_error++;
+ }
} else if (af_set.count(type)!=0) {
- if (header.af_col==0) {header.af_col=header.coln+1;} else {cout<<"error! more than two af columns in the file."<<endl; n_error++;}
+ if (header.af_col==0) {
+ header.af_col=header.coln+1;
+ } else {
+ cout<<"error! more than two af columns in the file."<<endl;
+ n_error++;
+ }
} else if (cor_set.count(type)!=0) {
- if (header.cor_col==0) {header.cor_col=header.coln+1;} else {cout<<"error! more than two cor columns in the file."<<endl; n_error++;}
+ if (header.cor_col==0) {
+ header.cor_col=header.coln+1;
+ } else {
+ cout<<"error! more than two cor columns in the file."<<endl;
+ n_error++;
+ }
} else {
string str = ch_ptr;
string cat = str.substr(str.size()-2, 2);
- // continuous
+
if(cat == "_c" || cat =="_C"){
+
+ // continuous
header.catc_col.insert(header.coln+1);
- } else { //discrete
+ } else {
+
+ // discrete
header.catd_col.insert(header.coln+1);
}
}
@@ -2640,7 +3004,10 @@ bool ReadHeader_io (const string &line, HEADER &header)
header.coln++;
}
- if (header.cor_col!=0 && header.cor_col!=header.coln) {cout<<"error! the cor column should be the last column."<<endl; n_error++;}
+ if (header.cor_col!=0 && header.cor_col!=header.coln) {
+ cout<<"error! the cor column should be the last column."<<endl;
+ n_error++;
+ }
if (header.rs_col==0) {
if (header.chr_col!=0 && header.pos_col!=0) {
@@ -2650,34 +3017,38 @@ bool ReadHeader_io (const string &line, HEADER &header)
}
}
- if (n_error==0) {return true;} else {return false;}
+ if (n_error==0) {
+ return true;
+ } else {
+ return false;
+ }
}
-
-
-
-//read category file, record mapRS2in
-//the category file does not contain a null category
-//so if a snp has 0 for all categories, then it is not included in the analysis
-bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_t &n_vc)
-{
+// Read category file, record mapRS2 in the category file does not
+// contain a null category so if a snp has 0 for all categories, then
+// it is not included in the analysis.
+bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
+ size_t &n_vc) {
mapRS2cat.clear();
igzstream infile (file_cat.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return false;}
+ if (!infile) {
+ cout<<"error! fail to open category file: "<<file_cat<<endl;
+ return false;
+ }
string line;
char *ch_ptr;
string rs, chr, a1, a0, pos, cm;
- size_t i_cat;// ns_vc=0;
+ size_t i_cat;
- //read header
+ // Read header.
HEADER header;
!safeGetline(infile, line).eof();
ReadHeader_io (line, header);
- //use the header to count the number of categories
+ // Use the header to count the number of categories.
n_vc=header.coln;
if (header.rs_col!=0) {n_vc--;}
if (header.chr_col!=0) {n_vc--;}
@@ -2686,7 +3057,7 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_
if (header.a1_col!=0) {n_vc--;}
if (header.a0_col!=0) {n_vc--;}
- //read the following lines to record mapRS2cat
+ // Read the following lines to record mapRS2cat.
while (!safeGetline(infile, line).eof()) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -2717,27 +3088,23 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_
ch_ptr=strtok (NULL, " , \t");
}
-
- //if (mapRS2cat.count(rs)==0) {mapRS2cat[rs]=n_vc+1; ns_vc++;}
}
- //if (ns_vc>0) {n_vc++;}
-
infile.clear();
infile.close();
return true;
}
-
-
-
-bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, size_t &n_vc)
-{
+bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat,
+ size_t &n_vc) {
mapRS2cat.clear();
igzstream infile (file_mcat.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open mcategory file: "<<file_mcat<<endl; return false;}
+ if (!infile) {
+ cout<<"error! fail to open mcategory file: "<<file_mcat<<endl;
+ return false;
+ }
string file_name;
map<string, size_t> mapRS2cat_tmp;
@@ -2754,125 +3121,22 @@ bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, siz
return true;
}
-
-
-
-
-/*
-//read the continuous category file, record mapR2catc
-bool ReadFile_catc (const string &file_cat, map<string, vector<double> > &mapRS2catc, size_t &n_cat)
-{
- mapRS2catc.clear();
-
- igzstream infile (file_cat.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return false;}
-
- string line;
- char *ch_ptr;
-
- string rs, chr, a1, a0, pos, cm;
- size_t i_cat;// ns_vc=0;
-
- //read header
- HEADER header;
- !safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
-
- //use the header to count the number of categories
- n_cat=header.coln;
- if (header.rs_col!=0) {n_cat--;}
- if (header.chr_col!=0) {n_cat--;}
- if (header.pos_col!=0) {n_cat--;}
- if (header.cm_col!=0) {n_cat--;}
- if (header.a1_col!=0) {n_cat--;}
- if (header.a0_col!=0) {n_cat--;}
-
- //set up continous category
- vector<double> catc;
- for (size_t i=0; i<n_cat; i++) {
- catc.push_back(0);
- }
-
- //read the following lines to record mapRS2cat
- while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
- i_cat=0;
- if (header.rs_col==0) {
- rs=chr+":"+pos;
- }
-
- for (size_t i=0; i<header.coln; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {
- rs=ch_ptr;
- } else if (header.chr_col!=0 && header.chr_col==i+1) {
- chr=ch_ptr;
- } else if (header.pos_col!=0 && header.pos_col==i+1) {
- pos=ch_ptr;
- } else if (header.cm_col!=0 && header.cm_col==i+1) {
- cm=ch_ptr;
- } else if (header.a1_col!=0 && header.a1_col==i+1) {
- a1=ch_ptr;
- } else if (header.a0_col!=0 && header.a0_col==i+1) {
- a0=ch_ptr;
- } else {
- catc[i_cat]=atof(ch_ptr);
- i_cat++;
- }
-
- ch_ptr=strtok (NULL, " , \t");
- }
-
- if (mapRS2catc.count(rs)==0) {mapRS2catc[rs]=catc;}
-
- //if (mapRS2cat.count(rs)==0) {mapRS2cat[rs]=n_vc+1; ns_vc++;}
- }
-
- //if (ns_vc>0) {n_vc++;}
-
- infile.clear();
- infile.close();
-
- return true;
-}
-
-
-
-
-bool ReadFile_mcatc (const string &file_mcat, map<string, vector<double> > &mapRS2catc, size_t &n_cat)
-{
- mapRS2catc.clear();
-
- igzstream infile (file_mcat.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open mcategory file: "<<file_mcat<<endl; return false;}
-
- string file_name;
- map<string, vector<double> > mapRS2catc_tmp;
- size_t n_cat_tmp, t=0;
-
- while (!safeGetline(infile, file_name).eof()) {
- mapRS2catc_tmp.clear();
- ReadFile_catc (file_name, mapRS2catc_tmp, n_cat_tmp);
- mapRS2catc.insert(mapRS2catc_tmp.begin(), mapRS2catc_tmp.end());
- if (t==0) {n_cat=n_cat_tmp;}
- if (n_cat!=n_cat_tmp) {cout<<"number of category differs in different mcatc files."<<endl;;}
-
- t++;
- }
-
- return true;
-}
-*/
-
-
-
-
-//read bimbam mean genotype file and calculate kinship matrix; this time, the kinship matrix is not centered, and can contain multiple K matrix
-bool BimbamKin (const string &file_geno, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns)
-{
+// Read bimbam mean genotype file and calculate kinship matrix; this
+// time, the kinship matrix is not centered, and can contain multiple
+// K matrix.
+bool BimbamKin (const string &file_geno, const int display_pace,
+ const vector<int> &indicator_idv,
+ const vector<int> &indicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<SNPINFO> &snpInfo,
+ const gsl_matrix *W, gsl_matrix *matrix_kin,
+ gsl_vector *vector_ns) {
igzstream infile (file_geno.c_str(), igzstream::in);
- //ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading genotype file:"<<file_geno<<endl;
+ return false;
+ }
string line;
char *ch_ptr;
@@ -2902,7 +3166,7 @@ bool BimbamKin (const string &file_geno, const int display_pace, const vector<in
ns_vec.push_back(0);
}
- //create a large matrix
+ // Create a large matrix.
size_t msize=10000;
gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc);
gsl_matrix_set_zero(Xlarge);
@@ -2910,14 +3174,16 @@ bool BimbamKin (const string &file_geno, const int display_pace, const vector<in
size_t ns_test=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
!safeGetline(infile, line).eof();
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
+ ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
+ }
if (indicator_snp[t]==0) {continue;}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
ch_ptr=strtok (NULL, " , \t");
ch_ptr=strtok (NULL, " , \t");
- rs=snpInfo[t].rs_number;//this line is new
+ rs=snpInfo[t].rs_number; // This line is new.
geno_mean=0.0; n_miss=0; geno_var=0.0;
gsl_vector_set_all(geno_miss, 0);
@@ -2926,13 +3192,15 @@ bool BimbamKin (const string &file_geno, const int display_pace, const vector<in
for (size_t i=0; i<indicator_idv.size(); ++i) {
if (indicator_idv[i]==0) {continue;}
ch_ptr=strtok (NULL, " , \t");
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(geno_miss, i, 0); n_miss++;}
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set(geno_miss, i, 0); n_miss++;
+ }
else {
- d=atof(ch_ptr);
- gsl_vector_set (geno, j, d);
- gsl_vector_set (geno_miss, j, 1);
- geno_mean+=d;
- geno_var+=d*d;
+ d=atof(ch_ptr);
+ gsl_vector_set (geno, j, d);
+ gsl_vector_set (geno_miss, j, 1);
+ geno_mean+=d;
+ geno_var+=d*d;
}
j++;
}
@@ -2941,10 +3209,11 @@ bool BimbamKin (const string &file_geno, const int display_pace, const vector<in
geno_var+=geno_mean*geno_mean*(double)n_miss;
geno_var/=(double)ni_test;
geno_var-=geno_mean*geno_mean;
-// geno_var=geno_mean*(1-geno_mean*0.5);
for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+ if (gsl_vector_get (geno_miss, i)==0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
}
gsl_vector_add_constant (geno, -1.0*geno_mean);
@@ -2955,48 +3224,43 @@ bool BimbamKin (const string &file_geno, const int display_pace, const vector<in
gsl_blas_ddot (geno, geno, &geno_var);
geno_var/=(double)ni_test;
- if (geno_var!=0 && (mapRS2weight.size()==0 || mapRS2weight.count(rs)!=0) ) {
+ if (geno_var!=0 && (mapRS2weight.size()==0 ||
+ mapRS2weight.count(rs)!=0)) {
if (mapRS2weight.size()==0) {
d=1.0/geno_var;
} else {
d=mapRS2weight.at(rs)/geno_var;
}
- /*
- if (n_vc==1 || mapRS2cat.size()==0 ) {
- gsl_blas_dsyr (CblasUpper, d, geno, matrix_kin);
- ns_vec[0]++;
- } else if (mapRS2cat.count(rs)!=0) {
- i_vc=mapRS2cat.at(rs);
- ns_vec[i_vc]++;
- gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
- gsl_blas_dsyr (CblasUpper, d, geno, &kin_sub.matrix);
- //eigenlib_dsyr (1.0, geno, matrix_kin);
- }
- */
-
gsl_vector_scale (geno, sqrt(d));
if (n_vc==1 || mapRS2cat.size()==0 ) {
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, ns_vec[0]%msize);
+ gsl_vector_view Xlarge_col=
+ gsl_matrix_column(Xlarge,ns_vec[0]%msize);
gsl_vector_memcpy (&Xlarge_col.vector, geno);
ns_vec[0]++;
if (ns_vec[0]%msize==0) {
- eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
gsl_matrix_set_zero(Xlarge);
}
} else if (mapRS2cat.count(rs)!=0) {
i_vc=mapRS2cat.at(rs);
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, msize*i_vc+ns_vec[i_vc]%msize);
+ gsl_vector_view Xlarge_col=
+ gsl_matrix_column(Xlarge,msize*i_vc+ns_vec[i_vc]%msize);
gsl_vector_memcpy (&Xlarge_col.vector, geno);
ns_vec[i_vc]++;
if (ns_vec[i_vc]%msize==0) {
- gsl_matrix_view X_sub=gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
- gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
- eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, &kin_sub.matrix);
+ gsl_matrix_view X_sub=
+ gsl_matrix_submatrix(Xlarge,0,msize*i_vc,
+ ni_test,msize);
+ gsl_matrix_view kin_sub=
+ gsl_matrix_submatrix(matrix_kin,0,ni_test*i_vc,
+ ni_test,ni_test);
+ eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix,
+ &X_sub.matrix, 1.0, &kin_sub.matrix);
gsl_matrix_set_zero(&X_sub.matrix);
}
@@ -3009,9 +3273,13 @@ bool BimbamKin (const string &file_geno, const int display_pace, const vector<in
for (size_t i_vc=0; i_vc<n_vc; i_vc++) {
if (ns_vec[i_vc]%msize!=0) {
- gsl_matrix_view X_sub=gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
- gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
- eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, &kin_sub.matrix);
+ gsl_matrix_view X_sub=
+ gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
+ gsl_matrix_view kin_sub=
+ gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test,
+ ni_test);
+ eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix,
+ 1.0, &kin_sub.matrix);
}
}
@@ -3047,16 +3315,19 @@ bool BimbamKin (const string &file_geno, const int display_pace, const vector<in
return true;
}
-
-
-
-
-
-
-bool PlinkKin (const string &file_bed, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns)
-{
+bool PlinkKin (const string &file_bed, const int display_pace,
+ const vector<int> &indicator_idv,
+ const vector<int> &indicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<SNPINFO> &snpInfo,
+ const gsl_matrix *W, gsl_matrix *matrix_kin,
+ gsl_vector *vector_ns) {
ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+ if (!infile) {
+ cout<<"error reading bed file:"<<file_bed<<endl;
+ return false;
+ }
char ch[1];
bitset<8> b;
@@ -3089,58 +3360,68 @@ bool PlinkKin (const string &file_bed, const int display_pace, const vector<int>
ns_vec.push_back(0);
}
- //create a large matrix
+ // Create a large matrix.
size_t msize=10000;
gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc);
gsl_matrix_set_zero(Xlarge);
- //calculate n_bit and c, the number of bit for each snp
+ // Calculate n_bit and c, the number of bit for each SNP.
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
- //print the first three majic numbers
+ // Print the first three magic numbers.
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
}
for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
+ ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
+ }
if (indicator_snp[t]==0) {continue;}
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+ // n_bit, and 3 is the number of magic numbers
+ infile.seekg(t*n_bit+3);
- rs=snpInfo[t].rs_number;//this line is new
+ rs=snpInfo[t].rs_number; // This line is new.
- //read genotypes
+ // Read genotypes.
geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
- if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(geno, ci_test, 2.0); geno_mean+=2.0; geno_var+=4.0; }
- else {gsl_vector_set(geno, ci_test, 1.0); geno_mean+=1.0; geno_var+=1.0;}
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
- else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
- }
-
- ci_test++;
- ci_total++;
- }
+ infile.read(ch,1);
+ b=ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j=0; j<4; ++j) {
+ if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {
+ gsl_vector_set(geno, ci_test, 2.0);
+ geno_mean+=2.0; geno_var+=4.0;
+ }
+ else {
+ gsl_vector_set(geno, ci_test, 1.0);
+ geno_mean+=1.0;
+ geno_var+=1.0;
+ }
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
+ else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
+ }
+
+ ci_test++;
+ ci_total++;
+ }
}
-
geno_mean/=(double)(ni_test-n_miss);
geno_var+=geno_mean*geno_mean*(double)n_miss;
geno_var/=(double)ni_test;
geno_var-=geno_mean*geno_mean;
-// geno_var=geno_mean*(1-geno_mean*0.5);
for (size_t i=0; i<ni_test; ++i) {
d=gsl_vector_get(geno,i);
@@ -3155,47 +3436,43 @@ bool PlinkKin (const string &file_bed, const int display_pace, const vector<int>
gsl_blas_ddot (geno, geno, &geno_var);
geno_var/=(double)ni_test;
- if (geno_var!=0 && (mapRS2weight.size()==0 || mapRS2weight.count(rs)!=0) ) {
+ if (geno_var!=0 && (mapRS2weight.size()==0 ||
+ mapRS2weight.count(rs)!=0)) {
if (mapRS2weight.size()==0) {
d=1.0/geno_var;
} else {
d=mapRS2weight.at(rs)/geno_var;
}
- /*
- if (n_vc==1 || mapRS2cat.size()==0 ) {
- gsl_blas_dsyr (CblasUpper, d, geno, matrix_kin);
- ns_vec[0]++;
- } else if (mapRS2cat.count(rs)!=0) {
- i_vc=mapRS2cat.at(rs);
- ns_vec[i_vc]++;
- gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
- gsl_blas_dsyr (CblasUpper, d, geno, &kin_sub.matrix);
- }
- */
-
gsl_vector_scale (geno, sqrt(d));
if (n_vc==1 || mapRS2cat.size()==0 ) {
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, ns_vec[0]%msize);
+ gsl_vector_view Xlarge_col=
+ gsl_matrix_column (Xlarge, ns_vec[0]%msize);
gsl_vector_memcpy (&Xlarge_col.vector, geno);
ns_vec[0]++;
if (ns_vec[0]%msize==0) {
- eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
gsl_matrix_set_zero(Xlarge);
}
} else if (mapRS2cat.count(rs)!=0) {
i_vc=mapRS2cat.at(rs);
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, msize*i_vc+ns_vec[i_vc]%msize);
+ gsl_vector_view Xlarge_col=
+ gsl_matrix_column(Xlarge,msize*i_vc+ns_vec[i_vc]%msize);
gsl_vector_memcpy (&Xlarge_col.vector, geno);
ns_vec[i_vc]++;
if (ns_vec[i_vc]%msize==0) {
- gsl_matrix_view X_sub=gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
- gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
- eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, &kin_sub.matrix);
+ gsl_matrix_view X_sub=
+ gsl_matrix_submatrix(Xlarge,0,msize*i_vc,ni_test,
+ msize);
+ gsl_matrix_view kin_sub=
+ gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc,
+ ni_test, ni_test);
+ eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix,
+ &X_sub.matrix, 1.0, &kin_sub.matrix);
gsl_matrix_set_zero(&X_sub.matrix);
}
@@ -3208,9 +3485,13 @@ bool PlinkKin (const string &file_bed, const int display_pace, const vector<int>
for (size_t i_vc=0; i_vc<n_vc; i_vc++) {
if (ns_vec[i_vc]%msize!=0) {
- gsl_matrix_view X_sub=gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
- gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
- eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, &kin_sub.matrix);
+ gsl_matrix_view X_sub=
+ gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
+ gsl_matrix_view kin_sub=
+ gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc,
+ ni_test, ni_test);
+ eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix,
+ 1.0, &kin_sub.matrix);
}
}
@@ -3245,16 +3526,23 @@ bool PlinkKin (const string &file_bed, const int display_pace, const vector<int>
return true;
}
-
-
-bool MFILEKin (const size_t mfile_mode, const string &file_mfile, const int display_pace, const vector<int> &indicator_idv, const vector<vector<int> > &mindicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<vector<SNPINFO> > &msnpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns)
-{
+bool MFILEKin (const size_t mfile_mode, const string &file_mfile,
+ const int display_pace, const vector<int> &indicator_idv,
+ const vector<vector<int> > &mindicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<vector<SNPINFO> > &msnpInfo,
+ const gsl_matrix *W, gsl_matrix *matrix_kin,
+ gsl_vector *vector_ns) {
size_t n_vc=vector_ns->size, ni_test=matrix_kin->size1;
gsl_matrix_set_zero(matrix_kin);
gsl_vector_set_zero(vector_ns);
igzstream infile (file_mfile.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open mfile file: "<<file_mfile<<endl; return false;}
+ if (!infile) {
+ cout<<"error! fail to open mfile file: "<<file_mfile<<endl;
+ return false;
+ }
string file_name;
@@ -3273,11 +3561,11 @@ bool MFILEKin (const size_t mfile_mode, const string &file_mfile, const int disp
} else {
BimbamKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
}
-
- //add ns
+
+ // Add ns.
gsl_vector_add(vector_ns, ns_tmp);
- //add kin
+ // Add kin.
for (size_t t=0; t<n_vc; t++) {
for (size_t i=0; i<ni_test; ++i) {
for (size_t j=0; j<=i; ++j) {
@@ -3291,11 +3579,12 @@ bool MFILEKin (const size_t mfile_mode, const string &file_mfile, const int disp
l++;
}
- //renormalize kin
+ // Renormalize kin.
for (size_t t=0; t<n_vc; t++) {
for (size_t i=0; i<ni_test; ++i) {
for (size_t j=0; j<=i; ++j) {
- d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)/gsl_vector_get(vector_ns, t);
+ d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)/
+ gsl_vector_get(vector_ns, t);
gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
@@ -3315,15 +3604,16 @@ bool MFILEKin (const size_t mfile_mode, const string &file_mfile, const int disp
}
-
-
-//read var file, store mapRS2wsnp
-bool ReadFile_wsnp (const string &file_wsnp, map<string, double> &mapRS2weight)
-{
+// Read var file, store mapRS2wsnp.
+bool ReadFile_wsnp (const string &file_wsnp,
+ map<string, double> &mapRS2weight) {
mapRS2weight.clear();
igzstream infile (file_wsnp.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open snp weight file: "<<file_wsnp<<endl; return false;}
+ if (!infile) {
+ cout<<"error! fail to open snp weight file: "<<file_wsnp<<endl;
+ return false;
+ }
char *ch_ptr;
string line, rs;
@@ -3340,12 +3630,15 @@ bool ReadFile_wsnp (const string &file_wsnp, map<string, double> &mapRS2weight)
return true;
}
-bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vector<double> > &mapRS2wvector)
-{
+bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
+ map<string, vector<double> > &mapRS2wvector) {
mapRS2wvector.clear();
igzstream infile (file_wcat.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open snp weight file: "<<file_wcat<<endl; return false;}
+ if (!infile) {
+ cout<<"error! fail to open snp weight file: "<<file_wcat<<endl;
+ return false;
+ }
char *ch_ptr;
vector<double> weight;
@@ -3354,10 +3647,8 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vect
}
string line, rs, chr, a1, a0, pos, cm;
- //double af=0, var_x=0;
- //size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;
-
- //read header
+
+ // Read header.
HEADER header;
!safeGetline(infile, line).eof();
ReadHeader_io (line, header);
@@ -3366,7 +3657,6 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vect
if (isBlankLine(line)) {continue;}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
- //n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; n_case=0; af=0; var_x=0;
size_t t=0;
for (size_t i=0; i<header.coln; i++) {
if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
@@ -3375,22 +3665,23 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vect
else if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr; }
else if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr; }
else if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr; }
- //else if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr); }
- //else if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr); }
- //else if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr); }
- //else if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr); }
- //else if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {n_control=atoi(ch_ptr); }
- //else if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr); }
- //else if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr); }
else {
weight[t]=atof(ch_ptr); t++;
- if (t>n_vc) {cout<<"error! Number of columns in the wcat file does not match that of cat file."; return false;}
+ if (t>n_vc) {
+ cout<<"error! Number of columns in the wcat file does not "<<
+ "match that of cat file.";
+ return false;
+ }
}
ch_ptr=strtok (NULL, " , \t");
}
- if (t!=n_vc) {cout<<"error! Number of columns in the wcat file does not match that of cat file."; return false;}
+ if (t!=n_vc) {
+ cout<<"error! Number of columns in the wcat file does not "<<
+ "match that of cat file.";
+ return false;
+ }
if (header.rs_col==0) {
rs=chr+":"+pos;
@@ -3402,25 +3693,28 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vect
return true;
}
-
-
-
-
-
-
-
-//read the beta file, save snp z scores in to z2_score, and save category into indicator_snp based on mapRS2var and set, and indicator_snp record the category number (from 1 to n_vc), and provide var if maf/var is not provided in the beta file
-//notice that indicator_snp contains ns_test snps, instead of ns_total snps
-//read the beta file for the second time, compute q, and Vq based on block jacknife
-//use the mapRS2var to select snps (and to ), calculate q
-//do a block-wise jacknife, and compute Vq
-void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2cat, const map<string, double> &mapRS2wA, vector<size_t> &vec_cat, vector<size_t> &vec_ni, vector<double> &vec_weight, vector<double> &vec_z2, size_t &ni_total, size_t &ns_total, size_t &ns_test)
-{
+// Read the beta file, save snp z scores in to z2_score, and save
+// category into indicator_snp based on mapRS2var and set, and
+// indicator_snp record the category number (from 1 to n_vc), and
+// provide var if maf/var is not provided in the beta file notice that
+// indicator_snp contains ns_test snps, instead of ns_total snps read
+// the beta file for the second time, compute q, and Vq based on block
+// jacknife use the mapRS2var to select snps (and to ), calculate q do
+// a block-wise jacknife, and compute Vq
+void ReadFile_beta (const string &file_beta,
+ const map<string, size_t> &mapRS2cat,
+ const map<string, double> &mapRS2wA,
+ vector<size_t> &vec_cat, vector<size_t> &vec_ni,
+ vector<double> &vec_weight, vector<double> &vec_z2,
+ size_t &ni_total, size_t &ns_total, size_t &ns_test) {
vec_cat.clear(); vec_ni.clear(); vec_weight.clear(); vec_z2.clear();
ni_total=0; ns_total=0; ns_test=0;
igzstream infile (file_beta.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open beta file: "<<file_beta<<endl; return;}
+ if (!infile) {
+ cout<<"error! fail to open beta file: "<<file_beta<<endl;
+ return;
+ }
string line;
char *ch_ptr;
@@ -3430,27 +3724,25 @@ void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2ca
double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;
- //read header
+ // Read header.
HEADER header;
!safeGetline(infile, line).eof();
ReadHeader_io (line, header);
if (header.n_col==0 ) {
- if ( (header.nobs_col==0 && header.nmis_col==0) && (header.ncase_col==0 && header.ncontrol_col==0) ) {
+ if ( (header.nobs_col==0 && header.nmis_col==0) &&
+ (header.ncase_col==0 && header.ncontrol_col==0) ) {
cout<<"error! missing sample size in the beta file."<<endl;
} else {
cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
}
}
- if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) && header.chisq_col==0 && header.p_col==0) {
+ if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) &&
+ header.chisq_col==0 && header.p_col==0) {
cout<<"error! missing z scores in the beta file."<<endl;
}
- /*
- if (header.af_col==0 && header.var_col==0) {
- cout<<"error! missing allele frequency in the beta file."<<endl;
- }
- */
+
while (!safeGetline(infile, line).eof()) {
if (isBlankLine(line)) {continue;}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -3467,7 +3759,9 @@ void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2ca
if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
- if (header.sebeta_col!=0 && header.sebeta_col==i+1) {se_beta=atof(ch_ptr);}
+ if (header.sebeta_col!=0 && header.sebeta_col==i+1) {
+ se_beta=atof(ch_ptr);
+ }
if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
@@ -3475,8 +3769,9 @@ void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2ca
if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);}
- if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {n_control=atoi(ch_ptr);}
-
+ if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {
+ n_control=atoi(ch_ptr);
+ }
if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
@@ -3495,7 +3790,8 @@ void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2ca
}
}
- //both z values and beta/se_beta have directions, while chisq/pvalue do not
+ // Both z values and beta/se_beta have directions, while
+ // chisq/pvalue do not.
if (header.z_col!=0) {
zsquare=z*z;
} else if (header.beta_col!=0 && header.sebeta_col!=0) {
@@ -3507,13 +3803,14 @@ void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2ca
zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
} else {zsquare=0;}
- //obtain var_x
+ // Obtain var_x.
if (header.var_col==0 && header.af_col!=0) {
var_x=2.0*af*(1.0-af);
}
- //if the snp is also present in cor file, then do calculations
- if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) && (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) && zsquare!=0) {
+ // If the SNP is also present in cor file, then do calculations.
+ if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) &&
+ (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) && zsquare!=0) {
if (mapRS2cat.size()!=0) {
vec_cat.push_back(mapRS2cat.at(rs));
} else {
@@ -3540,17 +3837,17 @@ void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2ca
return;
}
-
-
-
-
-
-void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA, map<string, string> &mapRS2A1, map<string, double> &mapRS2z)
-{
+void ReadFile_beta (const string &file_beta,
+ const map<string, double> &mapRS2wA,
+ map<string, string> &mapRS2A1,
+ map<string, double> &mapRS2z) {
mapRS2A1.clear(); mapRS2z.clear();
igzstream infile (file_beta.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open beta file: "<<file_beta<<endl; return;}
+ if (!infile) {
+ cout<<"error! fail to open beta file: "<<file_beta<<endl;
+ return;
+ }
string line;
char *ch_ptr;
@@ -3561,13 +3858,14 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;
size_t ni_total=0, ns_total=0, ns_test=0;
- //read header
+ // Read header.
HEADER header;
!safeGetline(infile, line).eof();
ReadHeader_io (line, header);
if (header.n_col==0 ) {
- if ( (header.nobs_col==0 && header.nmis_col==0) && (header.ncase_col==0 && header.ncontrol_col==0) ) {
+ if ((header.nobs_col==0 && header.nmis_col==0) &&
+ (header.ncase_col==0 && header.ncontrol_col==0)) {
cout<<"error! missing sample size in the beta file."<<endl;
} else {
cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
@@ -3577,11 +3875,7 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0)) {
cout<<"error! missing z scores in the beta file."<<endl;
}
- /*
- if (header.af_col==0 && header.var_col==0) {
- cout<<"error! missing allele frequency in the beta file."<<endl;
- }
- */
+
while (!safeGetline(infile, line).eof()) {
if (isBlankLine(line)) {continue;}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -3598,7 +3892,9 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
- if (header.sebeta_col!=0 && header.sebeta_col==i+1) {se_beta=atof(ch_ptr);}
+ if (header.sebeta_col!=0 && header.sebeta_col==i+1) {
+ se_beta=atof(ch_ptr);
+ }
if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
@@ -3606,7 +3902,9 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);}
- if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {n_control=atoi(ch_ptr);}
+ if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {
+ n_control=atoi(ch_ptr);
+ }
if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
@@ -3626,7 +3924,8 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
}
}
- //both z values and beta/se_beta have directions, while chisq/pvalue do not
+ // Both z values and beta/se_beta have directions, while
+ // chisq/pvalue do not.
if (header.z_col!=0) {
z=z;
} else if (header.beta_col!=0 && header.sebeta_col!=0) {
@@ -3635,7 +3934,7 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
z=0;
}
- //if the snp is also present in cor file, then do calculations
+ // If the snp is also present in cor file, then do calculations.
if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) ) {
mapRS2z[rs]=z;
mapRS2A1[rs]=a1;
@@ -3653,10 +3952,10 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
return;
}
-
-
-void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<size_t> &vec_ni, const vector<double> &vec_weight, const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q, gsl_vector *s)
-{
+void Calcq (const size_t n_block, const vector<size_t> &vec_cat,
+ const vector<size_t> &vec_ni, const vector<double> &vec_weight,
+ const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q,
+ gsl_vector *s) {
gsl_matrix_set_zero (Vq);
gsl_vector_set_zero (q);
gsl_vector_set_zero (s);
@@ -3677,21 +3976,22 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<si
mat_s.push_back(vec_s);
}
- //compute q and s
+ // Compute q and s.
for (size_t i=0; i<vec_cat.size(); i++) {
- //extract quantities
+
+ // Extract quantities.
cat=vec_cat[i];
n_total=vec_ni[i];
w=vec_weight[i];
zsquare=vec_z2[i];
- //compute q and s
+ // Compute q and s.
vec_q[cat]+=(zsquare-1.0)*w/(double)n_total;
vec_s[cat]+=w;
n_snps[cat]++;
}
- //update q; vec_q is used again for computing Vq below
+ // Update q; vec_q is used again for computing Vq below.
for (size_t i=0; i<q->size; i++) {
if (vec_s[i]!=0) {
gsl_vector_set(q, i, vec_q[i]/vec_s[i]);
@@ -3699,14 +3999,15 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<si
gsl_vector_set(s, i, vec_s[i]);
}
- //compute Vq; divide SNPs in each category into evenly distributed blocks
+ // Compute Vq; divide SNPs in each category into evenly distributed
+ // blocks.
size_t t=0, b=0, n_snp=0;
double d, m, n;
for (size_t l=0; l<q->size; l++) {
n_snp=floor(n_snps[l]/n_block); t=0; b=0;
if (n_snp==0) {continue;}
- //initiate everything to zero
+ // Initiate everything to zero.
for (size_t i=0; i<n_block; i++) {
for (size_t j=0; j<q->size; j++) {
mat_q[i][j]=0;
@@ -3714,15 +4015,17 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<si
}
}
- //record values
+ // Record values.
for (size_t i=0; i<vec_cat.size(); i++) {
- //extract quantities
+
+ // Extract quantities.
cat=vec_cat[i];
n_total=vec_ni[i];
w=vec_weight[i];
zsquare=vec_z2[i];
- //save quantities for computing Vq (which is not divided by n_total)
+ // Save quantities for computing Vq (which is not divided by
+ // n_total).
mat_q[b][cat]+=(zsquare-1.0)*w;
mat_s[b][cat]+=w;
@@ -3735,7 +4038,7 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<si
}
}
- //center mat_q
+ // Center mat_q.
for (size_t i=0; i<q->size; i++) {
m=0; n=0;
for (size_t k=0; k<n_block; k++) {
@@ -3755,7 +4058,7 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<si
}
}
- //compute Vq for l'th row and l'th column only
+ // Compute Vq for l'th row and l'th column only.
for (size_t i=0; i<q->size; i++) {
d=0; n=0;
for (size_t k=0; k<n_block; k++) {
@@ -3788,14 +4091,14 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<si
return;
}
-
-
-
-//read vector file
+// Read vector file.
void ReadFile_vector (const string &file_vec, gsl_vector *vec)
{
igzstream infile (file_vec.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open vector file: "<<file_vec<<endl; return;}
+ if (!infile) {
+ cout<<"error! fail to open vector file: "<<file_vec<<endl;
+ return;
+ }
string line;
char *ch_ptr;
@@ -3812,11 +4115,12 @@ void ReadFile_vector (const string &file_vec, gsl_vector *vec)
return;
}
-
-void ReadFile_matrix (const string &file_mat, gsl_matrix *mat)
-{
+void ReadFile_matrix (const string &file_mat, gsl_matrix *mat) {
igzstream infile (file_mat.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open matrix file: "<<file_mat<<endl; return;}
+ if (!infile) {
+ cout<<"error! fail to open matrix file: "<<file_mat<<endl;
+ return;
+ }
string line;
char *ch_ptr;
@@ -3836,10 +4140,13 @@ void ReadFile_matrix (const string &file_mat, gsl_matrix *mat)
return;
}
-void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1, gsl_matrix *mat2)
-{
+void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1,
+ gsl_matrix *mat2) {
igzstream infile (file_mat.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open matrix file: "<<file_mat<<endl; return;}
+ if (!infile) {
+ cout<<"error! fail to open matrix file: "<<file_mat<<endl;
+ return;
+ }
string line;
char *ch_ptr;
@@ -3868,11 +4175,9 @@ void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1, gsl_matrix *mat2
return;
}
-
-
-//read study file
-void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni)
-{
+// Read study file.
+void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat,
+ gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
string Vqfile=file_study+".Vq.txt";
string sfile=file_study+".size.txt";
string qfile=file_study+".q.txt";
@@ -3895,19 +4200,16 @@ void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat, gsl_vector *q
return;
}
-
-//read reference file
-void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni)
-{
+// Read reference file.
+void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat,
+ gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
string sfile=file_ref+".size.txt";
string Sfile=file_ref+".S.txt";
- //string Vfile=file_ref+".V.txt";
gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
ReadFile_vector(sfile, s);
ReadFile_matrix(Sfile, S_mat, Svar_mat);
- //ReadFile_matrix(Vfile, V_mat);
double d;
for (size_t i=0; i<s_vec->size; i++) {
@@ -3921,10 +4223,9 @@ void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat, gsl_matrix *Svar_m
return;
}
-
-//read mstudy file
-void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni)
-{
+// Read mstudy file.
+void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat,
+ gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
gsl_matrix_set_zero(Vq_mat);
gsl_vector_set_zero(q_vec);
gsl_vector_set_zero(s_vec);
@@ -3935,7 +4236,10 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, gsl_vector
gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
igzstream infile (file_mstudy.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open mstudy file: "<<file_mstudy<<endl; return;}
+ if (!infile) {
+ cout<<"error! fail to open mstudy file: "<<file_mstudy<<endl;
+ return;
+ }
string file_name;
double d1, d2, d;
@@ -3996,45 +4300,42 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, gsl_vector
return;
}
-//read reference file
-void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni)
-{
+// Read reference file.
+void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat,
+ gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
gsl_matrix_set_zero(S_mat);
gsl_matrix_set_zero(Svar_mat);
- // gsl_matrix_set_zero(V_mat);
gsl_vector_set_zero(s_vec);
ni=0;
- //size_t n_vc=S_mat->size1;
gsl_matrix *S_sub=gsl_matrix_alloc (S_mat->size1, S_mat->size2);
gsl_matrix *Svar_sub=gsl_matrix_alloc (Svar_mat->size1, Svar_mat->size2);
- //gsl_matrix *V_sub=gsl_matrix_alloc (V_mat->size1, V_mat->size2);
gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
igzstream infile (file_mref.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open mref file: "<<file_mref<<endl; return;}
+ if (!infile) {
+ cout<<"error! fail to open mref file: "<<file_mref<<endl;
+ return;
+ }
string file_name;
double d1, d2, d;
- //size_t t_ij;
while (!safeGetline(infile, file_name).eof()) {
string sfile=file_name+".size.txt";
string Sfile=file_name+".S.txt";
- //string Vfile=file_name+".V.txt";
ReadFile_vector(sfile, s);
ReadFile_matrix(Sfile, S_sub, Svar_sub);
- //ReadFile_matrix(Vfile, V_sub);
- //update s_vec and ni
+ // Update s_vec and ni.
for (size_t i=0; i<s_vec->size; i++) {
d=gsl_vector_get (s, i)+gsl_vector_get (s_vec, i);
gsl_vector_set (s_vec, i, d);
}
ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size));
- //update S and Svar from each file
+ // Update S and Svar from each file.
for (size_t i=0; i<S_mat->size1; i++) {
d1=gsl_vector_get(s, i);
for (size_t j=0; j<S_mat->size2; j++) {
@@ -4049,30 +4350,9 @@ void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, gsl_matrix *Svar
gsl_matrix_add (S_mat, S_sub);
gsl_matrix_add (Svar_mat, Svar_sub);
- /*
- //update V from each file
- for (size_t i=0; i<n_vc; i++) {
- d1=gsl_vector_get(s, i);
- for (size_t j=i; j<n_vc; j++) {
- d2=gsl_vector_get(s, j);
- t_ij=GetabIndex (i+1, j+1, n_vc-2);
- for (size_t l=0; l<n_vc+1; l++) {
- if (l==n_vc) {d3=1;} else {d3=gsl_vector_get(s, l);}
- for (size_t m=0; m<n_vc+1; m++) {
- if (m==n_vc) {d4=1;} else {d4=gsl_vector_get(s, m);}
-
- d=gsl_matrix_get (V_sub, l, t_ij*(n_vc+1)+m)*d1*d2*d3*d4;
- gsl_matrix_set (V_sub, l, t_ij*(n_vc+1)+m, d);
- }
- }
- }
- }
-
- gsl_matrix_add (V_mat, V_sub);
- */
}
- //final: update S and Svar
+ // Final: update S and Svar.
for (size_t i=0; i<S_mat->size1; i++) {
d1=gsl_vector_get(s_vec, i);
if (d1==0) {continue;}