aboutsummaryrefslogtreecommitdiff
path: root/src/io.h
diff options
context:
space:
mode:
authorPjotr Prins2017-08-03 10:26:52 +0000
committerPjotr Prins2017-08-03 10:26:52 +0000
commit449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch)
tree63a4031267b10f587b695adb487aca5213889b20 /src/io.h
parentd8db988550d4cd0303f0b82a75499c2c94d97d45 (diff)
downloadpangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1. What are the use cases? 1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K 2. User passes in -snps: all these SNPs are considered for GWA and K 3. User passes in -snps and -ksnps: All these SNPs are used for GWA, Ksnps are used for K 4. User passes in -loco: SNPs are split by chromosome (GWA incl., K excl.) 5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA is subset explicitely (nyi) In all cases indicator_snp is honored and we get the most flexible way for studying SNP combinations that can be passed in in different ways. Overall added: - various comments in source code - tests in test framework inlc. fast-check - NDEBUG compilation support in the Makefile - -debug switch for GEMMA debug output - debug.h which includes enforce functions which work like assert. Unlike assert, enforce also works in release compilation - -nind switch limit the number of individuals used (trim_individuals for testing) - enforcing tests of input files - e.g. are number of individuals correct - checks for memory allocation - we should add more of those - more checks for gsl results - we should add more of those - replaced strtoken with regex as a first case. They should all be replaced. strtoken is not thread safe, for one. - introduced C++ iterators - introduced C++ closure in BimBam LMM for cached processing - more localized initialization of variables - makes for demonstratably more correct code - -ksnps adds snps into setKSnps - -gwasnps adds snps into setGWASnps - both sets are computed by -loco - attempted to make the code easier to read
Diffstat (limited to 'src/io.h')
-rw-r--r--src/io.h29
1 files changed, 16 insertions, 13 deletions
diff --git a/src/io.h b/src/io.h
index 3e1145a..27f145f 100644
--- a/src/io.h
+++ b/src/io.h
@@ -34,7 +34,7 @@ void ProgressBar(string str, double p, double total);
void ProgressBar(string str, double p, double total, double ratio);
std::istream &safeGetline(std::istream &is, std::string &t);
-bool ReadFile_snps(const string &file_snps, set<string> &setSnps);
+bool ReadFile_snps(const string file_snps, set<string> &setSnps);
bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps);
bool ReadFile_log(const string &file_log, double &pheno_mean);
@@ -83,13 +83,14 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U);
void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval);
-bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
- const int k_mode, const int display_pace,
- gsl_matrix *matrix_kin);
+bool BimbamKin(const string file_geno, const set<string> ksnps,
+ vector<int> &indicator_snp, const int k_mode,
+ const int display_pace, gsl_matrix *matrix_kin,
+ const bool test_nind);
bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
-bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
const bool calc_K);
bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
@@ -124,13 +125,14 @@ bool ReadFile_catc(const string &file_cat,
bool ReadFile_mcatc(const string &file_mcat,
map<string, vector<double>> &mapRS2catc, size_t &n_cat);
-bool BimbamKin(const string &file_geno, const int display_pace,
- const vector<int> &indicator_idv,
- const vector<int> &indicator_snp,
- const map<string, double> &mapRS2weight,
- const map<string, size_t> &mapRS2cat,
- const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
- gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
+ const int display_pace,
+ const vector<int> &indicator_idv,
+ const vector<int> &indicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+ gsl_matrix *matrix_kin, gsl_vector *vector_ns);
bool PlinkKin(const string &file_bed, const int display_pace,
const vector<int> &indicator_idv,
const vector<int> &indicator_snp,
@@ -139,7 +141,8 @@ bool PlinkKin(const string &file_bed, const int display_pace,
const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
gsl_matrix *matrix_kin, gsl_vector *vector_ns);
bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
- const int display_pace, const vector<int> &indicator_idv,
+ const set<string> setKSnps, const int display_pace,
+ const vector<int> &indicator_idv,
const vector<vector<int>> &mindicator_snp,
const map<string, double> &mapRS2weight,
const map<string, size_t> &mapRS2cat,