about summary refs log tree commit diff
path: root/src/io.h
diff options
context:
space:
mode:
authorPjotr Prins2017-08-03 10:26:52 +0000
committerPjotr Prins2017-08-03 10:26:52 +0000
commit449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch)
tree63a4031267b10f587b695adb487aca5213889b20 /src/io.h
parentd8db988550d4cd0303f0b82a75499c2c94d97d45 (diff)
downloadpangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1.

What are the use cases?

1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K
2. User passes in -snps: all these SNPs are considered for GWA and K
3. User passes in -snps and -ksnps: All these SNPs are used for GWA,
   Ksnps are used for K
4. User passes in -loco: SNPs are split by chromosome (GWA incl., K
   excl.)
5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA
   is subset explicitely (nyi)

In all cases indicator_snp is honored and we get the most flexible way for
studying SNP combinations that can be passed in in different ways.

Overall added:

  - various comments in source code
  - tests in test framework inlc. fast-check
  - NDEBUG compilation support in the Makefile
  - -debug switch for GEMMA debug output
  - debug.h which includes enforce functions which work like
    assert. Unlike assert, enforce also works in release compilation
  - -nind switch limit the number of individuals used
    (trim_individuals for testing)
  - enforcing tests of input files - e.g. are number of individuals correct
  - checks for memory allocation - we should add more of those
  - more checks for gsl results - we should add more of those
  - replaced strtoken with regex as a first case. They should all be
    replaced. strtoken is not thread safe, for one.
  - introduced C++ iterators
  - introduced C++ closure in BimBam LMM for cached processing
  - more localized initialization of variables - makes for demonstratably
    more correct code
  - -ksnps adds snps into setKSnps
  - -gwasnps adds snps into setGWASnps
  - both sets are computed by -loco
  - attempted to make the code easier to read
Diffstat (limited to 'src/io.h')
-rw-r--r--src/io.h29
1 files changed, 16 insertions, 13 deletions
diff --git a/src/io.h b/src/io.h
index 3e1145a..27f145f 100644
--- a/src/io.h
+++ b/src/io.h
@@ -34,7 +34,7 @@ void ProgressBar(string str, double p, double total);
 void ProgressBar(string str, double p, double total, double ratio);
 std::istream &safeGetline(std::istream &is, std::string &t);
 
-bool ReadFile_snps(const string &file_snps, set<string> &setSnps);
+bool ReadFile_snps(const string file_snps, set<string> &setSnps);
 bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps);
 bool ReadFile_log(const string &file_log, double &pheno_mean);
 
@@ -83,13 +83,14 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
 void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U);
 void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval);
 
-bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
-               const int k_mode, const int display_pace,
-               gsl_matrix *matrix_kin);
+bool BimbamKin(const string file_geno, const set<string> ksnps,
+               vector<int> &indicator_snp, const int k_mode,
+               const int display_pace, gsl_matrix *matrix_kin,
+               const bool test_nind);
 bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
               const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
 
-bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                    const bool calc_K);
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
@@ -124,13 +125,14 @@ bool ReadFile_catc(const string &file_cat,
 bool ReadFile_mcatc(const string &file_mcat,
                     map<string, vector<double>> &mapRS2catc, size_t &n_cat);
 
-bool BimbamKin(const string &file_geno, const int display_pace,
-               const vector<int> &indicator_idv,
-               const vector<int> &indicator_snp,
-               const map<string, double> &mapRS2weight,
-               const map<string, size_t> &mapRS2cat,
-               const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
-               gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
+                         const int display_pace,
+                         const vector<int> &indicator_idv,
+                         const vector<int> &indicator_snp,
+                         const map<string, double> &mapRS2weight,
+                         const map<string, size_t> &mapRS2cat,
+                         const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+                         gsl_matrix *matrix_kin, gsl_vector *vector_ns);
 bool PlinkKin(const string &file_bed, const int display_pace,
               const vector<int> &indicator_idv,
               const vector<int> &indicator_snp,
@@ -139,7 +141,8 @@ bool PlinkKin(const string &file_bed, const int display_pace,
               const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
               gsl_matrix *matrix_kin, gsl_vector *vector_ns);
 bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
-              const int display_pace, const vector<int> &indicator_idv,
+              const set<string> setKSnps, const int display_pace,
+              const vector<int> &indicator_idv,
               const vector<vector<int>> &mindicator_snp,
               const map<string, double> &mapRS2weight,
               const map<string, size_t> &mapRS2cat,