aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPjotr Prins2018-08-25 11:32:46 +0000
committerPjotr Prins2018-08-25 11:32:46 +0000
commitbe551a1b3c324dc96750cf992b6a5784e2b92ec7 (patch)
tree01830a02558f85b33fcb6ce7d54d1daab28ca669 /src
parenta24297bb1cb686d3630db3a0da8fa8a2079ad178 (diff)
downloadpangemma-be551a1b3c324dc96750cf992b6a5784e2b92ec7.tar.gz
Number format checking in strict mode. Also fixes #149
Diffstat (limited to 'src')
-rw-r--r--src/debug.h6
-rw-r--r--src/gemma_io.cpp11
-rw-r--r--src/lmm.cpp8
-rw-r--r--src/mathfunc.cpp9
-rw-r--r--src/mathfunc.h2
5 files changed, 28 insertions, 8 deletions
diff --git a/src/debug.h b/src/debug.h
index ced8e72..b08d75d 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -147,6 +147,12 @@ inline void __enforce_fail(const char *__assertion, const char *__file,
? __ASSERT_VOID_CAST(0) \
: __enforce_fail((msg).c_str(), __FILE__, __LINE__, __SHOW_FUNC))
+#define enforce_is_int(s) \
+ enforce_str(std::regex_match(s, std::regex("^[-+]?[0-9]+$")),s + " not an integer")
+
+#define enforce_is_float(s) \
+ enforce_str(std::regex_match(s, std::regex("^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$")),s + " not a float")
+
// Helpers to create a unique varname per MACRO
#define COMBINE1(X, Y) X##Y
#define COMBINE(X, Y) COMBINE1(X, Y)
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index fad3e47..4d8f6bc 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1368,7 +1368,7 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
enforce_msg(infile, "error reading genotype file");
size_t n_miss;
- double d, geno_mean, geno_var;
+ double geno_mean, geno_var;
// setKSnp and/or LOCO support
bool process_ksnps = ksnps.size();
@@ -1433,10 +1433,9 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
gsl_vector_set(geno_miss, i, 0);
n_miss++;
} else {
- d = stod(field);
- // make sure genotype field contains a number
- if (field != "0" && field != "0.0")
- enforce_str(d != 0.0f, field);
+ double d = stod(field);
+ if (is_strict_mode() && d == 0.0)
+ enforce_is_float(field); // rule out non NA and non-float fields
gsl_vector_set(geno, i, d);
gsl_vector_set(geno_miss, i, 1);
geno_mean += d;
@@ -1486,7 +1485,7 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
debug_msg("begin transpose");
for (size_t i = 0; i < ni_total; ++i) {
for (size_t j = 0; j < i; ++j) {
- d = gsl_matrix_get(matrix_kin, j, i);
+ double d = gsl_matrix_get(matrix_kin, j, i);
gsl_matrix_set(matrix_kin, i, j, d);
}
}
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 7e4c3e1..ed1366d 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -28,6 +28,7 @@
#include <cstring>
#include <iomanip>
#include <iostream>
+#include <regex>
#include <stdio.h>
#include <stdlib.h>
@@ -1498,8 +1499,11 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
for (size_t i = 0; i < ni_total; ++i) {
ch_ptr = strtok_safe2(NULL, " , \t",infilen);
- if (strcmp(ch_ptr, "NA") != 0)
+ if (strcmp(ch_ptr, "NA") != 0) {
gs[i] = atof(ch_ptr);
+ if (is_strict_mode() && gs[i] == 0.0)
+ enforce_is_float(std::string(__STRING(ch_ptr))); // only allow for NA and valid numbers
+ }
}
return std::make_tuple(snp,gs);
};
@@ -1873,7 +1877,7 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
gsl_vector_view HiW_col = gsl_matrix_column(HiW, i);
gsl_vector_mul(&HiW_col.vector, Hi_eval);
}
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW);
+ fast_dgemm("T", "N", 1.0, HiW, UtW, 0.0, WHiW);
gsl_blas_dgemv(CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
int sig;
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 9076c47..7117700 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -26,6 +26,7 @@
#include <iostream>
#include <limits.h>
#include <map>
+#include <regex>
#include <set>
#include <sstream>
#include <stdio.h>
@@ -94,6 +95,14 @@ bool has_inf(const gsl_matrix *m) {
return false;
}
+bool is_integer(const std::string & s){
+ return std::regex_match(s, std::regex("^[0-9]+$"));
+}
+
+bool is_float(const std::string & s){
+ return std::regex_match(s, std::regex("^[+-]?([0-9]*[.])?[0-9]+$"));
+}
+
// calculate variance of a vector
double VectorVar(const gsl_vector *v) {
double d, m = 0.0, m2 = 0.0;
diff --git a/src/mathfunc.h b/src/mathfunc.h
index 8258c22..89a34e6 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -40,6 +40,8 @@ bool has_inf(const gsl_vector *v);
bool has_nan(const gsl_matrix *m);
bool has_inf(const gsl_matrix *m);
+bool is_integer(const std::string & s);
+
double VectorVar(const gsl_vector *v);
void CenterMatrix(gsl_matrix *G);
void CenterMatrix(gsl_matrix *G, const gsl_vector *w);