diff options
author | Pjotr Prins | 2017-08-03 10:26:52 +0000 |
---|---|---|
committer | Pjotr Prins | 2017-08-03 10:26:52 +0000 |
commit | 449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch) | |
tree | 63a4031267b10f587b695adb487aca5213889b20 /src/io.h | |
parent | d8db988550d4cd0303f0b82a75499c2c94d97d45 (diff) | |
download | pangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz |
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1.
What are the use cases?
1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K
2. User passes in -snps: all these SNPs are considered for GWA and K
3. User passes in -snps and -ksnps: All these SNPs are used for GWA,
Ksnps are used for K
4. User passes in -loco: SNPs are split by chromosome (GWA incl., K
excl.)
5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA
is subset explicitely (nyi)
In all cases indicator_snp is honored and we get the most flexible way for
studying SNP combinations that can be passed in in different ways.
Overall added:
- various comments in source code
- tests in test framework inlc. fast-check
- NDEBUG compilation support in the Makefile
- -debug switch for GEMMA debug output
- debug.h which includes enforce functions which work like
assert. Unlike assert, enforce also works in release compilation
- -nind switch limit the number of individuals used
(trim_individuals for testing)
- enforcing tests of input files - e.g. are number of individuals correct
- checks for memory allocation - we should add more of those
- more checks for gsl results - we should add more of those
- replaced strtoken with regex as a first case. They should all be
replaced. strtoken is not thread safe, for one.
- introduced C++ iterators
- introduced C++ closure in BimBam LMM for cached processing
- more localized initialization of variables - makes for demonstratably
more correct code
- -ksnps adds snps into setKSnps
- -gwasnps adds snps into setGWASnps
- both sets are computed by -loco
- attempted to make the code easier to read
Diffstat (limited to 'src/io.h')
-rw-r--r-- | src/io.h | 29 |
1 files changed, 16 insertions, 13 deletions
@@ -34,7 +34,7 @@ void ProgressBar(string str, double p, double total); void ProgressBar(string str, double p, double total, double ratio); std::istream &safeGetline(std::istream &is, std::string &t); -bool ReadFile_snps(const string &file_snps, set<string> &setSnps); +bool ReadFile_snps(const string file_snps, set<string> &setSnps); bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps); bool ReadFile_log(const string &file_log, double &pheno_mean); @@ -83,13 +83,14 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv, void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U); void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval); -bool BimbamKin(const string &file_geno, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin); +bool BimbamKin(const string file_geno, const set<string> ksnps, + vector<int> &indicator_snp, const int k_mode, + const int display_pace, gsl_matrix *matrix_kin, + const bool test_nind); bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin); -bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, +bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, @@ -124,13 +125,14 @@ bool ReadFile_catc(const string &file_cat, bool ReadFile_mcatc(const string &file_mcat, map<string, vector<double>> &mapRS2catc, size_t &n_cat); -bool BimbamKin(const string &file_geno, const int display_pace, - const vector<int> &indicator_idv, - const vector<int> &indicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<SNPINFO> &snpInfo, const gsl_matrix *W, - gsl_matrix *matrix_kin, gsl_vector *vector_ns); +bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps, + const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns); bool PlinkKin(const string &file_bed, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, @@ -139,7 +141,8 @@ bool PlinkKin(const string &file_bed, const int display_pace, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns); bool MFILEKin(const size_t mfile_mode, const string &file_mfile, - const int display_pace, const vector<int> &indicator_idv, + const set<string> setKSnps, const int display_pace, + const vector<int> &indicator_idv, const vector<vector<int>> &mindicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, |