about summary refs log tree commit diff
path: root/src/io.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/io.cpp')
-rw-r--r--src/io.cpp160
1 files changed, 79 insertions, 81 deletions
diff --git a/src/io.cpp b/src/io.cpp
index 8abdeec..35a59ee 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -152,7 +152,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   if (header.rs_col == 0 && (header.chr_col == 0 || header.pos_col == 0)) {
@@ -208,7 +208,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) {
   size_t flag = 0;
 
   while (getline(infile, line)) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     ch_ptr = strtok(NULL, " , \t");
 
     if (ch_ptr != NULL && strcmp(ch_ptr, "estimated") == 0) {
@@ -216,7 +216,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) {
       if (ch_ptr != NULL && strcmp(ch_ptr, "mean") == 0) {
         ch_ptr = strtok(NULL, " , \t");
         if (ch_ptr != NULL && strcmp(ch_ptr, "=") == 0) {
-          ch_ptr = strtok(NULL, " , \t");
+          ch_ptr = strtok_safe(NULL, " , \t");
           pheno_mean = atof(ch_ptr);
           flag = 1;
         }
@@ -314,7 +314,7 @@ bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
   string id;
   double p;
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (int i = 0; i < (p_column - 1); ++i) {
       ch_ptr = strtok(NULL, " , \t");
     }
@@ -486,17 +486,17 @@ bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
   string minor;
 
   while (getline(infile, line)) {
-    ch_ptr = strtok((char *)line.c_str(), " \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " \t");
     chr = ch_ptr;
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     rs = ch_ptr;
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     cM = atof(ch_ptr);
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     b_pos = atol(ch_ptr);
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     minor = ch_ptr;
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     major = ch_ptr;
 
     SNPINFO sInfo = {chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0};
@@ -542,12 +542,12 @@ bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
   }
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " \t");
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     id = ch_ptr;
-    ch_ptr = strtok(NULL, " \t");
-    ch_ptr = strtok(NULL, " \t");
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     ch_ptr = strtok(NULL, " \t");
 
     size_t i = 0;
@@ -649,11 +649,11 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
   file_pos = 0;
   auto count_warnings = 0;
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     minor = ch_ptr;
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     major = ch_ptr;
 
     if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
@@ -694,7 +694,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
     c_idv = 0;
     gsl_vector_set_zero(genotype_miss);
     for (int i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0)
         continue;
 
@@ -1004,13 +1004,13 @@ bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
   bool flag = false;
 
   for (size_t i = 0; i < inc; i++) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
   }
 
   if (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     geno_mean = 0.0;
     double d;
@@ -1018,7 +1018,7 @@ bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
     vector<size_t> geno_miss;
 
     for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -1134,9 +1134,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
     size_t i_test = 0, i_total = 0, j_test = 0, j_total = 0;
     while (getline(infile, line)) {
       if (i_total == ni_total) {
-        cout << "error! number of rows in the kinship "
-             << "file is larger than the number of phentypes." << endl;
-        error = true;
+        fail_msg("number of rows in the kinship file is larger than the number of phentypes");
       }
 
       if (indicator_idv[i_total] == 0) {
@@ -1149,10 +1147,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
       ch_ptr = strtok((char *)line.c_str(), " , \t");
       while (ch_ptr != NULL) {
         if (j_total == ni_total) {
-          cout << "error! number of columns in the "
-               << "kinship file is larger than the number"
-               << " of phenotypes for row = " << i_total << endl;
-          error = true;
+          fail_msg(string("number of columns in the kinship file is larger than the number of individuals for row = ")+to_string(i_total));
         }
 
         d = atof(ch_ptr);
@@ -1165,18 +1160,14 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
         ch_ptr = strtok(NULL, " , \t");
       }
       if (j_total != ni_total) {
-        cout << "error! number of columns in the kinship "
-             << "file do not match the number of phentypes for "
-             << "row = " << i_total << endl;
-        error = true;
+        string msg = "number of columns in the kinship file does not match the number of individuals for row = " + to_string( i_total );
+        fail_msg(msg);
       }
       i_total++;
       i_test++;
     }
     if (i_total != ni_total) {
-      cout << "error! number of rows in the kinship file do "
-           << "not match the number of phenotypes." << endl;
-      error = true;
+      fail_msg("number of rows in the kinship file does not match the number of individuals.");
     }
   } else {
     map<size_t, size_t> mapID2ID;
@@ -1193,11 +1184,11 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
     size_t n_id1, n_id2;
 
     while (getline(infile, line)) {
-      ch_ptr = strtok((char *)line.c_str(), " , \t");
+      ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
       id1 = ch_ptr;
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       id2 = ch_ptr;
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       d = atof(ch_ptr);
       if (mapID2num.count(id1) == 0 || mapID2num.count(id2) == 0) {
         continue;
@@ -1212,9 +1203,10 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
 
       Cov_d = gsl_matrix_get(G, n_id1, n_id2);
       if (Cov_d != 0 && Cov_d != d) {
-        cout << "error! redundant and unequal terms in the "
+        cerr << "error! redundant and unequal terms in the "
              << "kinship file, for id1 = " << id1 << " and id2 = " << id2
              << endl;
+        fail_msg("");
       } else {
         gsl_matrix_set(G, n_id1, n_id2, d);
         gsl_matrix_set(G, n_id2, n_id1, d);
@@ -1253,7 +1245,6 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
 
   infile.close();
   infile.clear();
-  return;
 }
 
 void ReadFile_eigenU(const string &file_ku, bool &error, gsl_matrix *U) {
@@ -1329,7 +1320,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
       error = true;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     d = atof(ch_ptr);
 
     ch_ptr = strtok(NULL, " , \t");
@@ -1380,7 +1371,7 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
   size_t ns_test = 0;
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
     string line;
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
       ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
     }
@@ -1662,21 +1653,21 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
   int c_idv = 0, c_snp = 0;
 
   for (int i = 0; i < ns_total; ++i) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (indicator_snp[i] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     c_idv = 0;
     geno_mean = 0;
     n_miss = 0;
     gsl_vector_set_zero(genotype_miss);
     for (int j = 0; j < ni_total; ++j) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[j] == 0) {
         continue;
       }
@@ -1771,21 +1762,21 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
   size_t c_idv = 0, c_snp = 0;
 
   for (size_t i = 0; i < ns_total; ++i) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (indicator_snp[i] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     c_idv = 0;
     geno_mean = 0;
     n_miss = 0;
     gsl_vector_set_zero(genotype_miss);
     for (uint j = 0; j < ni_total; ++j) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[j] == 0) {
         continue;
       }
@@ -2135,7 +2126,7 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
   size_t n = *max_element(est_column.begin(), est_column.end());
 
   while (getline(infile, line)) {
-    ch_ptr = strtok((char *)line.c_str(), " \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " \t");
 
     alpha = 0.0;
     beta = 0.0;
@@ -2154,7 +2145,7 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
         gamma = atof(ch_ptr);
       }
       if (i < n) {
-        ch_ptr = strtok(NULL, " \t");
+        ch_ptr = strtok_safe(NULL, " \t");
       }
     }
 
@@ -2212,7 +2203,7 @@ bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
   getline(infile, line);
 
   while (getline(infile, line)) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
 
     ch_ptr = strtok(NULL, " , \t");
@@ -2536,7 +2527,7 @@ bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   // Use the header to count the number of categories.
@@ -2562,10 +2553,11 @@ bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
 
   // Read the following lines to record mapRS2cat.
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     i_cat = 0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       } else if (header.chr_col != 0 && header.chr_col == i + 1) {
@@ -2686,16 +2678,16 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
 
   size_t ns_test = 0;
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
       ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
     }
     if (indicator_snp[t] == 0)
       continue;
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     rs = snpInfo[t].rs_number; // This line is new.
 
@@ -2709,7 +2701,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
       if (indicator_idv[i] == 0) {
         continue;
       }
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (strcmp(ch_ptr, "NA") == 0) {
         gsl_vector_set(geno_miss, i, 0);
         n_miss++;
@@ -3151,9 +3143,9 @@ bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2weight) {
   double weight;
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     weight = atof(ch_ptr);
     mapRS2weight[rs] = weight;
   }
@@ -3182,17 +3174,18 @@ bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc,
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   while (!safeGetline(infile, line).eof()) {
     if (isBlankLine(line)) {
       continue;
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     size_t t = 0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       } else if (header.chr_col != 0 && header.chr_col == i + 1) {
@@ -3274,7 +3267,7 @@ void ReadFile_beta(const string &file_beta,
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   if (header.n_col == 0) {
@@ -3296,7 +3289,7 @@ void ReadFile_beta(const string &file_beta,
     if (isBlankLine(line)) {
       continue;
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     z = 0;
     beta = 0;
@@ -3311,6 +3304,7 @@ void ReadFile_beta(const string &file_beta,
     af = 0;
     var_x = 0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       }
@@ -3456,7 +3450,7 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   if (header.n_col == 0) {
@@ -3477,7 +3471,7 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
     if (isBlankLine(line)) {
       continue;
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     z = 0;
     beta = 0;
@@ -3492,6 +3486,7 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
     af = 0;
     var_x = 0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       }
@@ -3762,8 +3757,8 @@ void ReadFile_vector(const string &file_vec, gsl_vector *vec) {
   char *ch_ptr;
 
   for (size_t i = 0; i < vec->size; i++) {
-    !safeGetline(infile, line).eof();
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    safeGetline(infile, line).eof();
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     gsl_vector_set(vec, i, atof(ch_ptr));
   }
 
@@ -3785,9 +3780,10 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) {
   char *ch_ptr;
 
   for (size_t i = 0; i < mat->size1; i++) {
-    !safeGetline(infile, line).eof();
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    safeGetline(infile, line).eof();
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (size_t j = 0; j < mat->size2; j++) {
+      enforce(ch_ptr);
       gsl_matrix_set(mat, i, j, atof(ch_ptr));
       ch_ptr = strtok(NULL, " , \t");
     }
@@ -3812,18 +3808,20 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1,
   char *ch_ptr;
 
   for (size_t i = 0; i < mat1->size1; i++) {
-    !safeGetline(infile, line).eof();
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    safeGetline(infile, line).eof();
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (size_t j = 0; j < mat1->size2; j++) {
+      enforce(ch_ptr);
       gsl_matrix_set(mat1, i, j, atof(ch_ptr));
       ch_ptr = strtok(NULL, " , \t");
     }
   }
 
   for (size_t i = 0; i < mat2->size1; i++) {
-    !safeGetline(infile, line).eof();
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    safeGetline(infile, line).eof();
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (size_t j = 0; j < mat2->size2; j++) {
+      enforce(ch_ptr);
       gsl_matrix_set(mat2, i, j, atof(ch_ptr));
       ch_ptr = strtok(NULL, " , \t");
     }