From 449d882a3b33ef81ef4f0127c3932b01fa796dbb Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 3 Aug 2017 10:26:52 +0000 Subject: LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco 1 switch for LOCO of chromosome 1. What are the use cases? 1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K 2. User passes in -snps: all these SNPs are considered for GWA and K 3. User passes in -snps and -ksnps: All these SNPs are used for GWA, Ksnps are used for K 4. User passes in -loco: SNPs are split by chromosome (GWA incl., K excl.) 5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA is subset explicitely (nyi) In all cases indicator_snp is honored and we get the most flexible way for studying SNP combinations that can be passed in in different ways. Overall added: - various comments in source code - tests in test framework inlc. fast-check - NDEBUG compilation support in the Makefile - -debug switch for GEMMA debug output - debug.h which includes enforce functions which work like assert. Unlike assert, enforce also works in release compilation - -nind switch limit the number of individuals used (trim_individuals for testing) - enforcing tests of input files - e.g. are number of individuals correct - checks for memory allocation - we should add more of those - more checks for gsl results - we should add more of those - replaced strtoken with regex as a first case. They should all be replaced. strtoken is not thread safe, for one. - introduced C++ iterators - introduced C++ closure in BimBam LMM for cached processing - more localized initialization of variables - makes for demonstratably more correct code - -ksnps adds snps into setKSnps - -gwasnps adds snps into setGWASnps - both sets are computed by -loco - attempted to make the code easier to read --- src/param.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'src/param.h') diff --git a/src/param.h b/src/param.h index 33e2431..45d8c0f 100644 --- a/src/param.h +++ b/src/param.h @@ -19,12 +19,15 @@ #ifndef __PARAM_H__ #define __PARAM_H__ +#include "debug.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" #include #include #include +#define K_BATCH_SIZE 10000 // #snps used for batched K + using namespace std; class SNPINFO { @@ -110,6 +113,7 @@ class PARAM { public: // IO-related parameters. bool mode_silence; + bool mode_debug = false; int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; vector p_column; // Which phenotype column needs analysis. @@ -135,12 +139,14 @@ public: string file_bf, file_hyp; string path_out; - string file_epm; // Estimated parameter file. - string file_ebv; // Estimated breeding value file. - string file_log; // Log file containing mean estimate. - string file_read; // File containing total number of reads. - string file_gene; // Gene expression file. - string file_snps; // File containing analyzed SNPs or genes. + string file_epm; // Estimated parameter file. + string file_ebv; // Estimated breeding value file. + string file_log; // Log file containing mean estimate. + string file_read; // File containing total number of reads. + string file_gene; // Gene expression file. + string file_snps; // File containing analyzed SNPs or genes. + string file_ksnps; // File SNPs for computing K + string file_gwasnps; // File SNPs for computing GWAS // WJA added. string file_oxford; @@ -152,6 +158,7 @@ public: double r2_level; // LMM-related parameters. + string loco; double l_min; double l_max; size_t n_region; @@ -215,6 +222,7 @@ public: // Number of individuals. size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; + size_t ni_max = 0; // -nind switch for testing purposes // Number of observed and missing phenotypes. size_t np_obs, np_miss; @@ -305,7 +313,9 @@ public: vector snpInfo; // Record SNP information. vector> msnpInfo; // Record SNP information. - set setSnps; // Set of snps for analysis. + set setSnps; // Set of snps for analysis (-snps). + set setKSnps; // Set of snps for K (-ksnps and LOCO) + set setGWASnps; // Set of snps for GWA (-gwasnps and LOCO) // Constructor. PARAM(); @@ -351,4 +361,10 @@ public: size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt); +// Helpers for checking parameters +#define enforce_fexists(fn, msg) \ + if (!fn.empty()) \ + enforce_msg(stat(fn.c_str(), &fileInfo) == 0, \ + ((std::string(__STRING(fn)) + ": " + msg).c_str())); + #endif -- cgit v1.2.3