From be551a1b3c324dc96750cf992b6a5784e2b92ec7 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 25 Aug 2018 11:32:46 +0000 Subject: Number format checking in strict mode. Also fixes #149 --- src/debug.h | 6 ++++++ src/gemma_io.cpp | 11 +++++------ src/lmm.cpp | 8 ++++++-- src/mathfunc.cpp | 9 +++++++++ src/mathfunc.h | 2 ++ 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/debug.h b/src/debug.h index ced8e72..b08d75d 100644 --- a/src/debug.h +++ b/src/debug.h @@ -147,6 +147,12 @@ inline void __enforce_fail(const char *__assertion, const char *__file, ? __ASSERT_VOID_CAST(0) \ : __enforce_fail((msg).c_str(), __FILE__, __LINE__, __SHOW_FUNC)) +#define enforce_is_int(s) \ + enforce_str(std::regex_match(s, std::regex("^[-+]?[0-9]+$")),s + " not an integer") + +#define enforce_is_float(s) \ + enforce_str(std::regex_match(s, std::regex("^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$")),s + " not a float") + // Helpers to create a unique varname per MACRO #define COMBINE1(X, Y) X##Y #define COMBINE(X, Y) COMBINE1(X, Y) diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index fad3e47..4d8f6bc 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1368,7 +1368,7 @@ bool BimbamKin(const string file_geno, const set ksnps, enforce_msg(infile, "error reading genotype file"); size_t n_miss; - double d, geno_mean, geno_var; + double geno_mean, geno_var; // setKSnp and/or LOCO support bool process_ksnps = ksnps.size(); @@ -1433,10 +1433,9 @@ bool BimbamKin(const string file_geno, const set ksnps, gsl_vector_set(geno_miss, i, 0); n_miss++; } else { - d = stod(field); - // make sure genotype field contains a number - if (field != "0" && field != "0.0") - enforce_str(d != 0.0f, field); + double d = stod(field); + if (is_strict_mode() && d == 0.0) + enforce_is_float(field); // rule out non NA and non-float fields gsl_vector_set(geno, i, d); gsl_vector_set(geno_miss, i, 1); geno_mean += d; @@ -1486,7 +1485,7 @@ bool BimbamKin(const string file_geno, const set ksnps, debug_msg("begin transpose"); for (size_t i = 0; i < ni_total; ++i) { for (size_t j = 0; j < i; ++j) { - d = gsl_matrix_get(matrix_kin, j, i); + double d = gsl_matrix_get(matrix_kin, j, i); gsl_matrix_set(matrix_kin, i, j, d); } } diff --git a/src/lmm.cpp b/src/lmm.cpp index 7e4c3e1..ed1366d 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1498,8 +1499,11 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, for (size_t i = 0; i < ni_total; ++i) { ch_ptr = strtok_safe2(NULL, " , \t",infilen); - if (strcmp(ch_ptr, "NA") != 0) + if (strcmp(ch_ptr, "NA") != 0) { gs[i] = atof(ch_ptr); + if (is_strict_mode() && gs[i] == 0.0) + enforce_is_float(std::string(__STRING(ch_ptr))); // only allow for NA and valid numbers + } } return std::make_tuple(snp,gs); }; @@ -1873,7 +1877,7 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW, gsl_vector_view HiW_col = gsl_matrix_column(HiW, i); gsl_vector_mul(&HiW_col.vector, Hi_eval); } - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW); + fast_dgemm("T", "N", 1.0, HiW, UtW, 0.0, WHiW); gsl_blas_dgemv(CblasTrans, 1.0, HiW, Uty, 0.0, WHiy); int sig; diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index 9076c47..7117700 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,14 @@ bool has_inf(const gsl_matrix *m) { return false; } +bool is_integer(const std::string & s){ + return std::regex_match(s, std::regex("^[0-9]+$")); +} + +bool is_float(const std::string & s){ + return std::regex_match(s, std::regex("^[+-]?([0-9]*[.])?[0-9]+$")); +} + // calculate variance of a vector double VectorVar(const gsl_vector *v) { double d, m = 0.0, m2 = 0.0; diff --git a/src/mathfunc.h b/src/mathfunc.h index 8258c22..89a34e6 100644 --- a/src/mathfunc.h +++ b/src/mathfunc.h @@ -40,6 +40,8 @@ bool has_inf(const gsl_vector *v); bool has_nan(const gsl_matrix *m); bool has_inf(const gsl_matrix *m); +bool is_integer(const std::string & s); + double VectorVar(const gsl_vector *v); void CenterMatrix(gsl_matrix *G); void CenterMatrix(gsl_matrix *G, const gsl_vector *w); -- cgit v1.2.3