about summary refs log tree commit diff
path: root/src/param.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/param.cpp')
-rw-r--r--src/param.cpp212
1 files changed, 55 insertions, 157 deletions
diff --git a/src/param.cpp b/src/param.cpp
index 3b319e9..bf6c195 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -16,12 +18,13 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
+#include <iostream>
+#include <iomanip>
+#include <string>
 #include <algorithm>
 #include <cmath>
 #include <cstring>
 #include <fstream>
-#include <iostream>
-#include <string>
 #include <sys/stat.h>
 
 #include "gsl/gsl_blas.h"
@@ -66,7 +69,7 @@ void LOCO_set_Snps(set<string> &ksnps, set<string> &gwasnps,
 // (indicator_idv[x] == 1). This should match indicator_cvt etc. If
 // this gives problems with certain sets we can simply trim to size.
 
-void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
+void trim_individuals(vector<int> &idvs, size_t ni_max) {
   if (ni_max) {
     size_t count = 0;
     for (auto ind = idvs.begin(); ind != idvs.end(); ++ind) {
@@ -76,7 +79,7 @@ void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
         break;
     }
     if (count != idvs.size()) {
-      if (debug)
+      if (is_debug_mode())
         cout << "**** TEST MODE: trim individuals from " << idvs.size()
              << " to " << count << endl;
       idvs.resize(count);
@@ -87,7 +90,7 @@ void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
 // ---- PARAM class implementation
 
 PARAM::PARAM(void)
-    : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000),
+    : a_mode(0), k_mode(1), d_pace(DEFAULT_PACE),
       file_out("result"), path_out("./output/"), miss_level(0.05),
       maf_level(0.01), hwe_level(0), r2_level(0.9999), l_min(1e-5), l_max(1e5),
       n_region(10), p_nr(0.001), em_prec(0.0001), nr_prec(0.0001),
@@ -97,7 +100,7 @@ PARAM::PARAM(void)
       rho_ngrid(10), s_min(0), s_max(300), w_step(100000), s_step(1000000),
       r_pace(10), w_pace(1000), n_accept(0), n_mh(10), geo_mean(2000.0),
       randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200),
-      error(false), ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0),
+      error(false), ni_subsample(0), n_cvt(1), n_cat(0), n_vc(1),
       time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0),
       time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {}
 
@@ -221,7 +224,7 @@ void PARAM::ReadFiles(void) {
   } else {
     n_cvt = 1;
   }
-  trim_individuals(indicator_cvt, ni_max, mode_debug);
+  trim_individuals(indicator_cvt, ni_max);
 
   if (!file_gxe.empty()) {
     if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) {
@@ -234,38 +237,7 @@ void PARAM::ReadFiles(void) {
     }
   }
 
-  trim_individuals(indicator_idv, ni_max, mode_debug);
-
-  // WJA added.
-  // Read genotype and phenotype file for bgen format.
-  if (!file_oxford.empty()) {
-    file_str = file_oxford + ".sample";
-    if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,
-                        indicator_cvt, cvt, n_cvt) == false) {
-      error = true;
-    }
-    if ((indicator_cvt).size() == 0) {
-      n_cvt = 1;
-    }
-
-    // Post-process covariates and phenotypes, obtain
-    // ni_test, save all useful covariates.
-    ProcessCvtPhen();
-
-    // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
-
-    file_str = file_oxford + ".bgen";
-    if (ReadFile_bgen(file_str, setSnps, W, indicator_idv, indicator_snp,
-                      snpInfo, maf_level, miss_level, hwe_level, r2_level,
-                      ns_test) == false) {
-      error = true;
-    }
-    gsl_matrix_free(W);
-
-    ns_total = indicator_snp.size();
-  }
+  trim_individuals(indicator_idv, ni_max);
 
   // Read genotype and phenotype file for PLINK format.
   if (!file_bfile.empty()) {
@@ -297,16 +269,16 @@ void PARAM::ReadFiles(void) {
     ProcessCvtPhen();
 
     // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
+    auto W1 = gsl_matrix_safe_alloc(ni_test, n_cvt);
+    CopyCvt(W1);
 
     file_str = file_bfile + ".bed";
-    if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp,
+    if (ReadFile_bed(file_str, setSnps, W1, indicator_idv, indicator_snp,
                      snpInfo, maf_level, miss_level, hwe_level, r2_level,
                      ns_test) == false) {
       error = true;
     }
-    gsl_matrix_free(W);
+    gsl_matrix_free(W1);
     ns_total = indicator_snp.size();
   }
 
@@ -330,17 +302,17 @@ void PARAM::ReadFiles(void) {
     ProcessCvtPhen();
 
     // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
+    auto W2 = gsl_matrix_safe_alloc(ni_test, n_cvt);
+    CopyCvt(W2);
 
-    trim_individuals(indicator_idv, ni_max, mode_debug);
-    trim_individuals(indicator_cvt, ni_max, mode_debug);
-    if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp,
+    trim_individuals(indicator_idv, ni_max);
+    trim_individuals(indicator_cvt, ni_max);
+    if (ReadFile_geno(file_geno, setSnps, W2, indicator_idv, indicator_snp,
                       maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
-                      mapRS2bp, mapRS2cM, snpInfo, ns_test, mode_debug) == false) {
+                      mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
       error = true;
     }
-    gsl_matrix_free(W);
+    gsl_matrix_free(W2);
 
     ns_total = indicator_snp.size();
   }
@@ -356,7 +328,7 @@ void PARAM::ReadFiles(void) {
 
     string file_name;
     size_t t = 0, ns_test_tmp = 0;
-    gsl_matrix *W;
+    gsl_matrix *W3 = NULL;
     while (!safeGetline(infile, file_name).eof()) {
       file_str = file_name + ".bim";
 
@@ -388,12 +360,12 @@ void PARAM::ReadFiles(void) {
         ProcessCvtPhen();
 
         // Obtain covariate matrix.
-        W = gsl_matrix_alloc(ni_test, n_cvt);
-        CopyCvt(W);
+        W3 = gsl_matrix_safe_alloc(ni_test, n_cvt);
+        CopyCvt(W3);
       }
 
       file_str = file_name + ".bed";
-      if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp,
+      if (ReadFile_bed(file_str, setSnps, W3, indicator_idv, indicator_snp,
                        snpInfo, maf_level, miss_level, hwe_level, r2_level,
                        ns_test_tmp) == false) {
         error = true;
@@ -406,7 +378,7 @@ void PARAM::ReadFiles(void) {
       t++;
     }
 
-    gsl_matrix_free(W);
+    if (W3) gsl_matrix_free(W3);
 
     infile.close();
     infile.clear();
@@ -432,8 +404,8 @@ void PARAM::ReadFiles(void) {
     ProcessCvtPhen();
 
     // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
+    gsl_matrix *W4 = gsl_matrix_safe_alloc(ni_test, n_cvt);
+    CopyCvt(W4);
 
     igzstream infile(file_mgeno.c_str(), igzstream::in);
     if (!infile) {
@@ -445,9 +417,9 @@ void PARAM::ReadFiles(void) {
     string file_name;
     size_t ns_test_tmp;
     while (!safeGetline(infile, file_name).eof()) {
-      if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp,
+      if (ReadFile_geno(file_name, setSnps, W4, indicator_idv, indicator_snp,
                         maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
-                        mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp, mode_debug) == false) {
+                        mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) {
         error = true;
       }
 
@@ -457,7 +429,7 @@ void PARAM::ReadFiles(void) {
       ns_total += indicator_snp.size();
     }
 
-    gsl_matrix_free(W);
+    gsl_matrix_free(W4);
 
     infile.close();
     infile.clear();
@@ -485,8 +457,8 @@ void PARAM::ReadFiles(void) {
     ProcessCvtPhen();
 
     // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
+    // gsl_matrix *W5 = gsl_matrix_alloc(ni_test, n_cvt);
+    // CopyCvt(W5);
 
     if (ReadFile_gene(file_gene, vec_read, snpInfo, ng_total) == false) {
       error = true;
@@ -741,19 +713,6 @@ void PARAM::CheckParam(void) {
     }
   }
 
-  if (!file_oxford.empty()) {
-    str = file_oxford + ".bgen";
-    if (stat(str.c_str(), &fileInfo) == -1) {
-      cout << "error! fail to open .bgen file: " << str << endl;
-      error = true;
-    }
-    str = file_oxford + ".sample";
-    if (stat(str.c_str(), &fileInfo) == -1) {
-      cout << "error! fail to open .sample file: " << str << endl;
-      error = true;
-    }
-  }
-
   if ((!file_geno.empty() || !file_gene.empty())) {
     str = file_pheno;
     if (stat(str.c_str(), &fileInfo) == -1) {
@@ -864,11 +823,6 @@ void PARAM::CheckParam(void) {
     flag++;
   }
 
-  // WJA added.
-  if (!file_oxford.empty()) {
-    flag++;
-  }
-
   if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 &&
       a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 &&
       a_mode != 63 && a_mode != 66 && a_mode != 67) {
@@ -942,14 +896,12 @@ void PARAM::CheckParam(void) {
   enforce_fexists(file_snps, "open file");
   enforce_fexists(file_ksnps, "open file");
   enforce_fexists(file_gwasnps, "open file");
-  enforce_fexists(file_log, "open file");
   enforce_fexists(file_anno, "open file");
 
   if (!loco.empty()) {
     enforce_msg((a_mode >= 1 && a_mode <= 4) || a_mode == 21 || a_mode == 22,
                 "LOCO only works with LMM and K");
-    enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)");
-    enforce_msg(file_oxford.empty(), "LOCO does not work with Oxford (yet)");
+    // enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)");
     enforce_msg(file_gxe.empty(), "LOCO does not support GXE (yet)");
     enforce_msg(!file_anno.empty(),
                 "LOCO requires annotation file (-a switch)");
@@ -957,54 +909,15 @@ void PARAM::CheckParam(void) {
     enforce_msg(file_gwasnps.empty(), "LOCO does not allow -gwasnps switch");
   }
 
-  str = file_kin;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open relatedness matrix file: " << str << endl;
-    error = true;
-  }
-
-  str = file_mk;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open relatedness matrix file: " << str << endl;
-    error = true;
-  }
-
-  str = file_cvt;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open covariates file: " << str << endl;
-    error = true;
-  }
-
-  str = file_gxe;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open environmental covariate file: " << str << endl;
-    error = true;
-  }
-
-  str = file_weight;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open the residual weight file: " << str << endl;
-    error = true;
-  }
-
-  str = file_epm;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open estimated parameter file: " << str << endl;
-    error = true;
-  }
-
-  str = file_ebv;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open estimated breeding value file: " << str
-         << endl;
-    error = true;
-  }
-
-  str = file_read;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open total read file: " << str << endl;
-    error = true;
-  }
+  enforce_fexists(file_kin, "open file");
+  enforce_fexists(file_mk, "open file");
+  enforce_fexists(file_cvt, "open file");
+  enforce_fexists(file_gxe, "open file");
+  enforce_fexists(file_log, "open file");
+  enforce_fexists(file_weight, "open file");
+  enforce_fexists(file_epm, "open file");
+  enforce_fexists(file_ebv, "open file");
+  enforce_fexists(file_read, "open file");
 
   // Check if files are compatible with analysis mode.
   if (k_mode == 2 && !file_geno.empty()) {
@@ -1056,14 +969,6 @@ void PARAM::CheckParam(void) {
 
 void PARAM::CheckData(void) {
 
-  // WJA NOTE: I added this condition so that covariates can be added
-  // through sample, probably not exactly what is wanted.
-  if (file_oxford.empty()) {
-    if ((file_cvt).empty() || (indicator_cvt).size() == 0) {
-      n_cvt = 1;
-    }
-  }
-
   if ((a_mode == 66 || a_mode == 67) && (v_pve.size() != n_vc)) {
     cout << "error! the number of pve estimates does not equal to "
          << "the number of categories in the cat file:" << v_pve.size() << " "
@@ -1194,21 +1099,21 @@ void PARAM::CheckData(void) {
       cout << "## number of total genes = " << ng_total << endl;
     } else if (file_epm.empty() && a_mode != 43 && a_mode != 5) {
       if (!loco.empty())
-        cout << "## leave one chromosome out (LOCO) = " << loco << endl;
-      cout << "## number of total SNPs    = " << ns_total << endl;
+        cout << "## leave one chromosome out (LOCO) = " << setw(8) << loco << endl;
+      cout << "## number of total SNPs/var        = " << setw(8) << ns_total << endl;
       if (setSnps.size())
-        cout << "## number of considered SNPS = " << setSnps.size() << endl;
+        cout << "## number of considered SNPS       = " << setw(8) << setSnps.size() << endl;
       if (setKSnps.size())
-        cout << "## number of SNPS for K    = " << setKSnps.size() << endl;
+        cout << "## number of SNPS for K            = " << setw(8) << setKSnps.size() << endl;
       if (setGWASnps.size())
-        cout << "## number of SNPS for GWAS = " << setGWASnps.size() << endl;
-      cout << "## number of analyzed SNPs = " << ns_test << endl;
+        cout << "## number of SNPS for GWAS         = " << setw(8) << setGWASnps.size() << endl;
+      cout << "## number of analyzed SNPs         = " << setw(8) << ns_test << endl;
     } else {
     }
   }
 
   // Set d_pace to 1000 for gene expression.
-  if (!file_gene.empty() && d_pace == 100000) {
+  if (!file_gene.empty() && d_pace == DEFAULT_PACE) {
     d_pace = 1000;
   }
 
@@ -1340,7 +1245,7 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
     }
   } else {
     if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K,
-                      calc_K, mode_debug) == false) {
+                      calc_K) == false) {
       error = true;
     }
   }
@@ -1360,7 +1265,7 @@ void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
     }
   } else {
     if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K,
-                      ni_test, ns_test, mode_debug) == false) {
+                      ni_test, ns_test) == false) {
       error = true;
     }
   }
@@ -1375,18 +1280,11 @@ void PARAM::CalcKin(gsl_matrix *matrix_kin) {
 
   if (!file_bfile.empty()) {
     file_str = file_bfile + ".bed";
-    enforce_msg(loco.empty(), "FIXME: LOCO nyi");
+    // enforce_msg(loco.empty(), "FIXME: LOCO nyi");
     if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
         false) {
       error = true;
     }
-  } else if (!file_oxford.empty()) {
-    file_str = file_oxford + ".bgen";
-    enforce_msg(loco.empty(), "FIXME: LOCO nyi");
-    if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
-        false) {
-      error = true;
-    }
   } else {
     file_str = file_geno;
     if (BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace,