about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorPjotr Prins2018-08-25 11:32:46 +0000
committerPjotr Prins2018-08-25 11:32:46 +0000
commitbe551a1b3c324dc96750cf992b6a5784e2b92ec7 (patch)
tree01830a02558f85b33fcb6ce7d54d1daab28ca669 /src
parenta24297bb1cb686d3630db3a0da8fa8a2079ad178 (diff)
downloadpangemma-be551a1b3c324dc96750cf992b6a5784e2b92ec7.tar.gz
Number format checking in strict mode. Also fixes #149
Diffstat (limited to 'src')
-rw-r--r--src/debug.h6
-rw-r--r--src/gemma_io.cpp11
-rw-r--r--src/lmm.cpp8
-rw-r--r--src/mathfunc.cpp9
-rw-r--r--src/mathfunc.h2
5 files changed, 28 insertions, 8 deletions
diff --git a/src/debug.h b/src/debug.h
index ced8e72..b08d75d 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -147,6 +147,12 @@ inline void __enforce_fail(const char *__assertion, const char *__file,
        ? __ASSERT_VOID_CAST(0)                                                 \
        : __enforce_fail((msg).c_str(), __FILE__, __LINE__, __SHOW_FUNC))
 
+#define enforce_is_int(s) \
+  enforce_str(std::regex_match(s, std::regex("^[-+]?[0-9]+$")),s + " not an integer")
+
+#define enforce_is_float(s)                                             \
+  enforce_str(std::regex_match(s, std::regex("^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$")),s + " not a float")
+
 // Helpers to create a unique varname per MACRO
 #define COMBINE1(X, Y) X##Y
 #define COMBINE(X, Y) COMBINE1(X, Y)
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index fad3e47..4d8f6bc 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1368,7 +1368,7 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
   enforce_msg(infile, "error reading genotype file");
 
   size_t n_miss;
-  double d, geno_mean, geno_var;
+  double geno_mean, geno_var;
 
   // setKSnp and/or LOCO support
   bool process_ksnps = ksnps.size();
@@ -1433,10 +1433,9 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
         gsl_vector_set(geno_miss, i, 0);
         n_miss++;
       } else {
-        d = stod(field);
-        // make sure genotype field contains a number
-        if (field != "0" && field != "0.0")
-          enforce_str(d != 0.0f, field);
+        double d = stod(field);
+        if (is_strict_mode() && d == 0.0)
+          enforce_is_float(field);  // rule out non NA and non-float fields
         gsl_vector_set(geno, i, d);
         gsl_vector_set(geno_miss, i, 1);
         geno_mean += d;
@@ -1486,7 +1485,7 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
   debug_msg("begin transpose");
   for (size_t i = 0; i < ni_total; ++i) {
     for (size_t j = 0; j < i; ++j) {
-      d = gsl_matrix_get(matrix_kin, j, i);
+      double d = gsl_matrix_get(matrix_kin, j, i);
       gsl_matrix_set(matrix_kin, i, j, d);
     }
   }
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 7e4c3e1..ed1366d 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -28,6 +28,7 @@
 #include <cstring>
 #include <iomanip>
 #include <iostream>
+#include <regex>
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -1498,8 +1499,11 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
 
     for (size_t i = 0; i < ni_total; ++i) {
       ch_ptr = strtok_safe2(NULL, " , \t",infilen);
-      if (strcmp(ch_ptr, "NA") != 0)
+      if (strcmp(ch_ptr, "NA") != 0) {
         gs[i] = atof(ch_ptr);
+        if (is_strict_mode() && gs[i] == 0.0)
+          enforce_is_float(std::string(__STRING(ch_ptr))); // only allow for NA and valid numbers
+      }
     }
     return std::make_tuple(snp,gs);
   };
@@ -1873,7 +1877,7 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
     gsl_vector_view HiW_col = gsl_matrix_column(HiW, i);
     gsl_vector_mul(&HiW_col.vector, Hi_eval);
   }
-  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW);
+  fast_dgemm("T", "N", 1.0, HiW, UtW, 0.0, WHiW);
   gsl_blas_dgemv(CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
 
   int sig;
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 9076c47..7117700 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -26,6 +26,7 @@
 #include <iostream>
 #include <limits.h>
 #include <map>
+#include <regex>
 #include <set>
 #include <sstream>
 #include <stdio.h>
@@ -94,6 +95,14 @@ bool has_inf(const gsl_matrix *m) {
   return false;
 }
 
+bool is_integer(const std::string & s){
+    return std::regex_match(s, std::regex("^[0-9]+$"));
+}
+
+bool is_float(const std::string & s){
+    return std::regex_match(s, std::regex("^[+-]?([0-9]*[.])?[0-9]+$"));
+}
+
 // calculate variance of a vector
 double VectorVar(const gsl_vector *v) {
   double d, m = 0.0, m2 = 0.0;
diff --git a/src/mathfunc.h b/src/mathfunc.h
index 8258c22..89a34e6 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -40,6 +40,8 @@ bool has_inf(const gsl_vector *v);
 bool has_nan(const gsl_matrix *m);
 bool has_inf(const gsl_matrix *m);
 
+bool is_integer(const std::string & s);
+
 double VectorVar(const gsl_vector *v);
 void CenterMatrix(gsl_matrix *G);
 void CenterMatrix(gsl_matrix *G, const gsl_vector *w);