From 449d882a3b33ef81ef4f0127c3932b01fa796dbb Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 3 Aug 2017 10:26:52 +0000 Subject: LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco 1 switch for LOCO of chromosome 1. What are the use cases? 1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K 2. User passes in -snps: all these SNPs are considered for GWA and K 3. User passes in -snps and -ksnps: All these SNPs are used for GWA, Ksnps are used for K 4. User passes in -loco: SNPs are split by chromosome (GWA incl., K excl.) 5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA is subset explicitely (nyi) In all cases indicator_snp is honored and we get the most flexible way for studying SNP combinations that can be passed in in different ways. Overall added: - various comments in source code - tests in test framework inlc. fast-check - NDEBUG compilation support in the Makefile - -debug switch for GEMMA debug output - debug.h which includes enforce functions which work like assert. Unlike assert, enforce also works in release compilation - -nind switch limit the number of individuals used (trim_individuals for testing) - enforcing tests of input files - e.g. are number of individuals correct - checks for memory allocation - we should add more of those - more checks for gsl results - we should add more of those - replaced strtoken with regex as a first case. They should all be replaced. strtoken is not thread safe, for one. - introduced C++ iterators - introduced C++ closure in BimBam LMM for cached processing - more localized initialization of variables - makes for demonstratably more correct code - -ksnps adds snps into setKSnps - -gwasnps adds snps into setGWASnps - both sets are computed by -loco - attempted to make the code easier to read --- src/io.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'src/io.h') diff --git a/src/io.h b/src/io.h index 3e1145a..27f145f 100644 --- a/src/io.h +++ b/src/io.h @@ -34,7 +34,7 @@ void ProgressBar(string str, double p, double total); void ProgressBar(string str, double p, double total, double ratio); std::istream &safeGetline(std::istream &is, std::string &t); -bool ReadFile_snps(const string &file_snps, set &setSnps); +bool ReadFile_snps(const string file_snps, set &setSnps); bool ReadFile_snps_header(const string &file_snps, set &setSnps); bool ReadFile_log(const string &file_log, double &pheno_mean); @@ -83,13 +83,14 @@ void ReadFile_mk(const string &file_mk, vector &indicator_idv, void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U); void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval); -bool BimbamKin(const string &file_geno, vector &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin); +bool BimbamKin(const string file_geno, const set ksnps, + vector &indicator_snp, const int k_mode, + const int display_pace, gsl_matrix *matrix_kin, + const bool test_nind); bool PlinkKin(const string &file_bed, vector &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin); -bool ReadFile_geno(const string &file_geno, vector &indicator_idv, +bool ReadFile_geno(const string file_geno, vector &indicator_idv, vector &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); bool ReadFile_bed(const string &file_bed, vector &indicator_idv, @@ -124,13 +125,14 @@ bool ReadFile_catc(const string &file_cat, bool ReadFile_mcatc(const string &file_mcat, map> &mapRS2catc, size_t &n_cat); -bool BimbamKin(const string &file_geno, const int display_pace, - const vector &indicator_idv, - const vector &indicator_snp, - const map &mapRS2weight, - const map &mapRS2cat, - const vector &snpInfo, const gsl_matrix *W, - gsl_matrix *matrix_kin, gsl_vector *vector_ns); +bool BimbamKinUncentered(const string &file_geno, const set ksnps, + const int display_pace, + const vector &indicator_idv, + const vector &indicator_snp, + const map &mapRS2weight, + const map &mapRS2cat, + const vector &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns); bool PlinkKin(const string &file_bed, const int display_pace, const vector &indicator_idv, const vector &indicator_snp, @@ -139,7 +141,8 @@ bool PlinkKin(const string &file_bed, const int display_pace, const vector &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns); bool MFILEKin(const size_t mfile_mode, const string &file_mfile, - const int display_pace, const vector &indicator_idv, + const set setKSnps, const int display_pace, + const vector &indicator_idv, const vector> &mindicator_snp, const map &mapRS2weight, const map &mapRS2cat, -- cgit v1.2.3