diff options
author | Pjotr Prins | 2017-08-03 10:26:52 +0000 |
---|---|---|
committer | Pjotr Prins | 2017-08-03 10:26:52 +0000 |
commit | 449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch) | |
tree | 63a4031267b10f587b695adb487aca5213889b20 /src/param.h | |
parent | d8db988550d4cd0303f0b82a75499c2c94d97d45 (diff) | |
download | pangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz |
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1.
What are the use cases?
1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K
2. User passes in -snps: all these SNPs are considered for GWA and K
3. User passes in -snps and -ksnps: All these SNPs are used for GWA,
Ksnps are used for K
4. User passes in -loco: SNPs are split by chromosome (GWA incl., K
excl.)
5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA
is subset explicitely (nyi)
In all cases indicator_snp is honored and we get the most flexible way for
studying SNP combinations that can be passed in in different ways.
Overall added:
- various comments in source code
- tests in test framework inlc. fast-check
- NDEBUG compilation support in the Makefile
- -debug switch for GEMMA debug output
- debug.h which includes enforce functions which work like
assert. Unlike assert, enforce also works in release compilation
- -nind switch limit the number of individuals used
(trim_individuals for testing)
- enforcing tests of input files - e.g. are number of individuals correct
- checks for memory allocation - we should add more of those
- more checks for gsl results - we should add more of those
- replaced strtoken with regex as a first case. They should all be
replaced. strtoken is not thread safe, for one.
- introduced C++ iterators
- introduced C++ closure in BimBam LMM for cached processing
- more localized initialization of variables - makes for demonstratably
more correct code
- -ksnps adds snps into setKSnps
- -gwasnps adds snps into setGWASnps
- both sets are computed by -loco
- attempted to make the code easier to read
Diffstat (limited to 'src/param.h')
-rw-r--r-- | src/param.h | 30 |
1 files changed, 23 insertions, 7 deletions
diff --git a/src/param.h b/src/param.h index 33e2431..45d8c0f 100644 --- a/src/param.h +++ b/src/param.h @@ -19,12 +19,15 @@ #ifndef __PARAM_H__ #define __PARAM_H__ +#include "debug.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" #include <map> #include <set> #include <vector> +#define K_BATCH_SIZE 10000 // #snps used for batched K + using namespace std; class SNPINFO { @@ -110,6 +113,7 @@ class PARAM { public: // IO-related parameters. bool mode_silence; + bool mode_debug = false; int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; vector<size_t> p_column; // Which phenotype column needs analysis. @@ -135,12 +139,14 @@ public: string file_bf, file_hyp; string path_out; - string file_epm; // Estimated parameter file. - string file_ebv; // Estimated breeding value file. - string file_log; // Log file containing mean estimate. - string file_read; // File containing total number of reads. - string file_gene; // Gene expression file. - string file_snps; // File containing analyzed SNPs or genes. + string file_epm; // Estimated parameter file. + string file_ebv; // Estimated breeding value file. + string file_log; // Log file containing mean estimate. + string file_read; // File containing total number of reads. + string file_gene; // Gene expression file. + string file_snps; // File containing analyzed SNPs or genes. + string file_ksnps; // File SNPs for computing K + string file_gwasnps; // File SNPs for computing GWAS // WJA added. string file_oxford; @@ -152,6 +158,7 @@ public: double r2_level; // LMM-related parameters. + string loco; double l_min; double l_max; size_t n_region; @@ -215,6 +222,7 @@ public: // Number of individuals. size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; + size_t ni_max = 0; // -nind switch for testing purposes // Number of observed and missing phenotypes. size_t np_obs, np_miss; @@ -305,7 +313,9 @@ public: vector<SNPINFO> snpInfo; // Record SNP information. vector<vector<SNPINFO>> msnpInfo; // Record SNP information. - set<string> setSnps; // Set of snps for analysis. + set<string> setSnps; // Set of snps for analysis (-snps). + set<string> setKSnps; // Set of snps for K (-ksnps and LOCO) + set<string> setGWASnps; // Set of snps for GWA (-gwasnps and LOCO) // Constructor. PARAM(); @@ -351,4 +361,10 @@ public: size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt); +// Helpers for checking parameters +#define enforce_fexists(fn, msg) \ + if (!fn.empty()) \ + enforce_msg(stat(fn.c_str(), &fileInfo) == 0, \ + ((std::string(__STRING(fn)) + ": " + msg).c_str())); + #endif |