aboutsummaryrefslogtreecommitdiff
path: root/src/io.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/io.cpp')
-rw-r--r--src/io.cpp156
1 files changed, 146 insertions, 10 deletions
diff --git a/src/io.cpp b/src/io.cpp
index fcbbec7..97b29b0 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -47,9 +47,10 @@
#include "io.h"
#endif
+
using namespace std;
-bool ReadHeader_io (const string &line, HEADER &header);
+
//Print process bar
void ProgressBar (string str, double p, double total)
@@ -180,7 +181,7 @@ bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps)
//read header
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader (line, header);
if (header.rs_col==0 && (header.chr_col==0 || header.pos_col==0) ) {
cout<<"missing rs id in the hearder"<<endl;
@@ -2500,7 +2501,7 @@ bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k
//read header to determine which column contains which item
-bool ReadHeader_io (const string &line, HEADER &header)
+bool ReadHeader (const string &line, HEADER &header)
{
string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID","MarkerName"};
set<string> rs_set(rs_ptr, rs_ptr+11);
@@ -2594,8 +2595,17 @@ bool ReadHeader_io (const string &line, HEADER &header)
if (header.af_col==0) {header.af_col=header.coln+1;} else {cout<<"error! more than two af columns in the file."<<endl; n_error++;}
} else if (cor_set.count(type)!=0) {
if (header.cor_col==0) {header.cor_col=header.coln+1;} else {cout<<"error! more than two cor columns in the file."<<endl; n_error++;}
- } else {}
-
+ } else {
+ string str = ch_ptr;
+ string cat = str.substr(str.size()-2, 2);
+ // continuous
+ if(cat == "_c" || cat =="_C"){
+ header.catc_col.insert(header.coln+1);
+ } else { //discrete
+ header.catd_col.insert(header.coln+1);
+ }
+ }
+
ch_ptr=strtok (NULL, " , \t");
header.coln++;
}
@@ -2635,7 +2645,7 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_
//read header
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader (line, header);
//use the header to count the number of categories
n_vc=header.coln;
@@ -2715,6 +2725,118 @@ bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, siz
}
+
+
+
+/*
+//read the continuous category file, record mapR2catc
+bool ReadFile_catc (const string &file_cat, map<string, vector<double> > &mapRS2catc, size_t &n_cat)
+{
+ mapRS2catc.clear();
+
+ igzstream infile (file_cat.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return false;}
+
+ string line;
+ char *ch_ptr;
+
+ string rs, chr, a1, a0, pos, cm;
+ size_t i_cat;// ns_vc=0;
+
+ //read header
+ HEADER header;
+ !safeGetline(infile, line).eof();
+ ReadHeader (line, header);
+
+ //use the header to count the number of categories
+ n_cat=header.coln;
+ if (header.rs_col!=0) {n_cat--;}
+ if (header.chr_col!=0) {n_cat--;}
+ if (header.pos_col!=0) {n_cat--;}
+ if (header.cm_col!=0) {n_cat--;}
+ if (header.a1_col!=0) {n_cat--;}
+ if (header.a0_col!=0) {n_cat--;}
+
+ //set up continous category
+ vector<double> catc;
+ for (size_t i=0; i<n_cat; i++) {
+ catc.push_back(0);
+ }
+
+ //read the following lines to record mapRS2cat
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+ i_cat=0;
+ if (header.rs_col==0) {
+ rs=chr+":"+pos;
+ }
+
+ for (size_t i=0; i<header.coln; i++) {
+ if (header.rs_col!=0 && header.rs_col==i+1) {
+ rs=ch_ptr;
+ } else if (header.chr_col!=0 && header.chr_col==i+1) {
+ chr=ch_ptr;
+ } else if (header.pos_col!=0 && header.pos_col==i+1) {
+ pos=ch_ptr;
+ } else if (header.cm_col!=0 && header.cm_col==i+1) {
+ cm=ch_ptr;
+ } else if (header.a1_col!=0 && header.a1_col==i+1) {
+ a1=ch_ptr;
+ } else if (header.a0_col!=0 && header.a0_col==i+1) {
+ a0=ch_ptr;
+ } else {
+ catc[i_cat]=atof(ch_ptr);
+ i_cat++;
+ }
+
+ ch_ptr=strtok (NULL, " , \t");
+ }
+
+ if (mapRS2catc.count(rs)==0) {mapRS2catc[rs]=catc;}
+
+ //if (mapRS2cat.count(rs)==0) {mapRS2cat[rs]=n_vc+1; ns_vc++;}
+ }
+
+ //if (ns_vc>0) {n_vc++;}
+
+ infile.clear();
+ infile.close();
+
+ return true;
+}
+
+
+
+
+bool ReadFile_mcatc (const string &file_mcat, map<string, vector<double> > &mapRS2catc, size_t &n_cat)
+{
+ mapRS2catc.clear();
+
+ igzstream infile (file_mcat.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open mcategory file: "<<file_mcat<<endl; return false;}
+
+ string file_name;
+ map<string, vector<double> > mapRS2catc_tmp;
+ size_t n_cat_tmp, t=0;
+
+ while (!safeGetline(infile, file_name).eof()) {
+ mapRS2catc_tmp.clear();
+ ReadFile_catc (file_name, mapRS2catc_tmp, n_cat_tmp);
+ mapRS2catc.insert(mapRS2catc_tmp.begin(), mapRS2catc_tmp.end());
+ if (t==0) {n_cat=n_cat_tmp;}
+ if (n_cat!=n_cat_tmp) {cout<<"number of category differs in different mcatc files."<<endl;;}
+
+ t++;
+ }
+
+ return true;
+}
+*/
+
+
+
+
//read bimbam mean genotype file and calculate kinship matrix; this time, the kinship matrix is not centered, and can contain multiple K matrix
bool BimbamKin (const string &file_geno, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns)
{
@@ -3208,7 +3330,7 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vect
//read header
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader (line, header);
while (!safeGetline(infile, line).eof()) {
if (isBlankLine(line)) {continue;}
@@ -3281,7 +3403,7 @@ void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2ca
//read header
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader (line, header);
if (header.n_col==0 ) {
if ( (header.nobs_col==0 && header.nmis_col==0) && (header.ncase_col==0 && header.ncontrol_col==0) ) {
@@ -3412,7 +3534,7 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
//read header
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader (line, header);
if (header.n_col==0 ) {
if ( (header.nobs_col==0 && header.nmis_col==0) && (header.ncase_col==0 && header.ncontrol_col==0) ) {
@@ -3503,7 +3625,6 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
-
void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<size_t> &vec_ni, const vector<double> &vec_weight, const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q, gsl_vector *s)
{
gsl_matrix_set_zero (Vq);
@@ -3845,6 +3966,21 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, gsl_vector
return;
}
+
+//copied from lmm.cpp; is used in the following function compKtoV
+//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
+size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
+ if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
+ size_t index;
+ size_t l, h;
+ if (b>a) {l=a; h=b;} else {l=b; h=a;}
+
+ size_t n=n_cvt+2;
+ index=(2*n-l+2)*(l-1)/2+h-l;
+
+ return index;
+}
+
//read reference file
void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni)
{