aboutsummaryrefslogtreecommitdiff
path: root/src/param.h
diff options
context:
space:
mode:
authorPjotr Prins2017-08-03 10:26:52 +0000
committerPjotr Prins2017-08-03 10:26:52 +0000
commit449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch)
tree63a4031267b10f587b695adb487aca5213889b20 /src/param.h
parentd8db988550d4cd0303f0b82a75499c2c94d97d45 (diff)
downloadpangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1. What are the use cases? 1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K 2. User passes in -snps: all these SNPs are considered for GWA and K 3. User passes in -snps and -ksnps: All these SNPs are used for GWA, Ksnps are used for K 4. User passes in -loco: SNPs are split by chromosome (GWA incl., K excl.) 5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA is subset explicitely (nyi) In all cases indicator_snp is honored and we get the most flexible way for studying SNP combinations that can be passed in in different ways. Overall added: - various comments in source code - tests in test framework inlc. fast-check - NDEBUG compilation support in the Makefile - -debug switch for GEMMA debug output - debug.h which includes enforce functions which work like assert. Unlike assert, enforce also works in release compilation - -nind switch limit the number of individuals used (trim_individuals for testing) - enforcing tests of input files - e.g. are number of individuals correct - checks for memory allocation - we should add more of those - more checks for gsl results - we should add more of those - replaced strtoken with regex as a first case. They should all be replaced. strtoken is not thread safe, for one. - introduced C++ iterators - introduced C++ closure in BimBam LMM for cached processing - more localized initialization of variables - makes for demonstratably more correct code - -ksnps adds snps into setKSnps - -gwasnps adds snps into setGWASnps - both sets are computed by -loco - attempted to make the code easier to read
Diffstat (limited to 'src/param.h')
-rw-r--r--src/param.h30
1 files changed, 23 insertions, 7 deletions
diff --git a/src/param.h b/src/param.h
index 33e2431..45d8c0f 100644
--- a/src/param.h
+++ b/src/param.h
@@ -19,12 +19,15 @@
#ifndef __PARAM_H__
#define __PARAM_H__
+#include "debug.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
#include <map>
#include <set>
#include <vector>
+#define K_BATCH_SIZE 10000 // #snps used for batched K
+
using namespace std;
class SNPINFO {
@@ -110,6 +113,7 @@ class PARAM {
public:
// IO-related parameters.
bool mode_silence;
+ bool mode_debug = false;
int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
vector<size_t> p_column; // Which phenotype column needs analysis.
@@ -135,12 +139,14 @@ public:
string file_bf, file_hyp;
string path_out;
- string file_epm; // Estimated parameter file.
- string file_ebv; // Estimated breeding value file.
- string file_log; // Log file containing mean estimate.
- string file_read; // File containing total number of reads.
- string file_gene; // Gene expression file.
- string file_snps; // File containing analyzed SNPs or genes.
+ string file_epm; // Estimated parameter file.
+ string file_ebv; // Estimated breeding value file.
+ string file_log; // Log file containing mean estimate.
+ string file_read; // File containing total number of reads.
+ string file_gene; // Gene expression file.
+ string file_snps; // File containing analyzed SNPs or genes.
+ string file_ksnps; // File SNPs for computing K
+ string file_gwasnps; // File SNPs for computing GWAS
// WJA added.
string file_oxford;
@@ -152,6 +158,7 @@ public:
double r2_level;
// LMM-related parameters.
+ string loco;
double l_min;
double l_max;
size_t n_region;
@@ -215,6 +222,7 @@ public:
// Number of individuals.
size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
+ size_t ni_max = 0; // -nind switch for testing purposes
// Number of observed and missing phenotypes.
size_t np_obs, np_miss;
@@ -305,7 +313,9 @@ public:
vector<SNPINFO> snpInfo; // Record SNP information.
vector<vector<SNPINFO>> msnpInfo; // Record SNP information.
- set<string> setSnps; // Set of snps for analysis.
+ set<string> setSnps; // Set of snps for analysis (-snps).
+ set<string> setKSnps; // Set of snps for K (-ksnps and LOCO)
+ set<string> setGWASnps; // Set of snps for GWA (-gwasnps and LOCO)
// Constructor.
PARAM();
@@ -351,4 +361,10 @@ public:
size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt);
+// Helpers for checking parameters
+#define enforce_fexists(fn, msg) \
+ if (!fn.empty()) \
+ enforce_msg(stat(fn.c_str(), &fileInfo) == 0, \
+ ((std::string(__STRING(fn)) + ": " + msg).c_str()));
+
#endif