about summary refs log tree commit diff
path: root/src/param.h
diff options
context:
space:
mode:
authorPjotr Prins2017-08-03 10:26:52 +0000
committerPjotr Prins2017-08-03 10:26:52 +0000
commit449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch)
tree63a4031267b10f587b695adb487aca5213889b20 /src/param.h
parentd8db988550d4cd0303f0b82a75499c2c94d97d45 (diff)
downloadpangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1.

What are the use cases?

1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K
2. User passes in -snps: all these SNPs are considered for GWA and K
3. User passes in -snps and -ksnps: All these SNPs are used for GWA,
   Ksnps are used for K
4. User passes in -loco: SNPs are split by chromosome (GWA incl., K
   excl.)
5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA
   is subset explicitely (nyi)

In all cases indicator_snp is honored and we get the most flexible way for
studying SNP combinations that can be passed in in different ways.

Overall added:

  - various comments in source code
  - tests in test framework inlc. fast-check
  - NDEBUG compilation support in the Makefile
  - -debug switch for GEMMA debug output
  - debug.h which includes enforce functions which work like
    assert. Unlike assert, enforce also works in release compilation
  - -nind switch limit the number of individuals used
    (trim_individuals for testing)
  - enforcing tests of input files - e.g. are number of individuals correct
  - checks for memory allocation - we should add more of those
  - more checks for gsl results - we should add more of those
  - replaced strtoken with regex as a first case. They should all be
    replaced. strtoken is not thread safe, for one.
  - introduced C++ iterators
  - introduced C++ closure in BimBam LMM for cached processing
  - more localized initialization of variables - makes for demonstratably
    more correct code
  - -ksnps adds snps into setKSnps
  - -gwasnps adds snps into setGWASnps
  - both sets are computed by -loco
  - attempted to make the code easier to read
Diffstat (limited to 'src/param.h')
-rw-r--r--src/param.h30
1 files changed, 23 insertions, 7 deletions
diff --git a/src/param.h b/src/param.h
index 33e2431..45d8c0f 100644
--- a/src/param.h
+++ b/src/param.h
@@ -19,12 +19,15 @@
 #ifndef __PARAM_H__
 #define __PARAM_H__
 
+#include "debug.h"
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_vector.h"
 #include <map>
 #include <set>
 #include <vector>
 
+#define K_BATCH_SIZE 10000 // #snps used for batched K
+
 using namespace std;
 
 class SNPINFO {
@@ -110,6 +113,7 @@ class PARAM {
 public:
   // IO-related parameters.
   bool mode_silence;
+  bool mode_debug = false;
   int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
   int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
   vector<size_t> p_column; // Which phenotype column needs analysis.
@@ -135,12 +139,14 @@ public:
   string file_bf, file_hyp;
   string path_out;
 
-  string file_epm;  // Estimated parameter file.
-  string file_ebv;  // Estimated breeding value file.
-  string file_log;  // Log file containing mean estimate.
-  string file_read; // File containing total number of reads.
-  string file_gene; // Gene expression file.
-  string file_snps; // File containing analyzed SNPs or genes.
+  string file_epm;     // Estimated parameter file.
+  string file_ebv;     // Estimated breeding value file.
+  string file_log;     // Log file containing mean estimate.
+  string file_read;    // File containing total number of reads.
+  string file_gene;    // Gene expression file.
+  string file_snps;    // File containing analyzed SNPs or genes.
+  string file_ksnps;   // File SNPs for computing K
+  string file_gwasnps; // File SNPs for computing GWAS
 
   // WJA added.
   string file_oxford;
@@ -152,6 +158,7 @@ public:
   double r2_level;
 
   // LMM-related parameters.
+  string loco;
   double l_min;
   double l_max;
   size_t n_region;
@@ -215,6 +222,7 @@ public:
 
   // Number of individuals.
   size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
+  size_t ni_max = 0; // -nind switch for testing purposes
 
   // Number of observed and missing phenotypes.
   size_t np_obs, np_miss;
@@ -305,7 +313,9 @@ public:
 
   vector<SNPINFO> snpInfo;          // Record SNP information.
   vector<vector<SNPINFO>> msnpInfo; // Record SNP information.
-  set<string> setSnps;              // Set of snps for analysis.
+  set<string> setSnps;              // Set of snps for analysis (-snps).
+  set<string> setKSnps;             // Set of snps for K (-ksnps and LOCO)
+  set<string> setGWASnps;           // Set of snps for GWA (-gwasnps and LOCO)
 
   // Constructor.
   PARAM();
@@ -351,4 +361,10 @@ public:
 
 size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt);
 
+// Helpers for checking parameters
+#define enforce_fexists(fn, msg)                                               \
+  if (!fn.empty())                                                             \
+    enforce_msg(stat(fn.c_str(), &fileInfo) == 0,                              \
+                ((std::string(__STRING(fn)) + ": " + msg).c_str()));
+
 #endif