about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gemma_io.cpp11
-rw-r--r--src/lmm.cpp58
2 files changed, 35 insertions, 34 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index cfa92d1..959e1f5 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -406,13 +406,14 @@ bool ReadFile_pheno(const string &file_pheno,
     return false;
   }
 
-  string line;
-  char *ch_ptr;
-
-  string id;
+  // string id;
   double p;
 
   vector<double> pheno_row;
+
+  string line;
+  char *ch_ptr;
+
   vector<int> indicator_pheno_row;
 
   size_t p_max = *max_element(p_column.begin(), p_column.end());
@@ -1516,7 +1517,7 @@ gsl_matrix *mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode)
 
   env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */
   env.set_max_dbs(10);
-  env.open("example/mouse_hs1940.geno.mdb", MDB_RDONLY | MDB_NOSUBDIR, 0664);
+  env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
 
   size_t ni_total = 0, num_markers = 0;
   {
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 6d89f77..2ac9835 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -40,6 +40,7 @@
 #include "gsl/gsl_min.h"
 #include "gsl/gsl_roots.h"
 #include "gsl/gsl_vector.h"
+#include <lmdb++.h>
 
 #include "gzstream.h"
 #include "gemma.h"
@@ -1870,49 +1871,48 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
                           const gsl_matrix *UtW, const gsl_vector *Uty,
                           const gsl_matrix *W, const gsl_vector *y,
                           const set<string> gwasnps) {
-  debug_msg(file_geno);
-  auto genofilen = file_geno.c_str();
   checkpoint("mdb-calc-gwa",file_geno);
 
-  igzstream genofile(genofilen, igzstream::in);
-  enforce_msg(genofile, "error reading genotype file");
-  size_t prev_line = 0;
+  const auto num_snps = indicator_snp.size();
+  enforce_msg(num_snps > 0,"Zero SNPs to process - data corrupt?");
 
-  std::vector <double> gs;
-  gs.resize(ni_total);
+  auto env = lmdb::env::create();
+
+  env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */
+  env.set_max_dbs(10);
+  env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
+  auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+  auto geno_mdb = lmdb::dbi::open(rtxn, "geno");
 
   // fetch_snp is a callback function for every SNP row
+  // returns typedef std::tuple<string,std::vector<double> > SnpNameValues;
+
+  size_t prev_line = 0;
+  auto mdb_fetch = MDB_FIRST;
+
+  auto cursor = lmdb::cursor::open(rtxn, geno_mdb);
+
   std::function<SnpNameValues(size_t)>  fetch_snp = [&](size_t num) {
-    string line;
+
+    string_view key,value;
+
     while (prev_line <= num) {
-      // also read SNPs that were skipped
-      safeGetline(genofile, line);
+      cursor.get(key, value, MDB_NEXT);
       prev_line++;
     }
-    char *ch_ptr = strtok_safe2((char *)line.c_str(), " , \t",genofilen);
-    // enforce_msg(ch_ptr, "Parsing BIMBAM genofile"); // ch_ptr should not be NULL
-
-    auto snp = string(ch_ptr);
-    ch_ptr = strtok_safe2(NULL, " , \t",genofilen); // skip column
-    ch_ptr = strtok_safe2(NULL, " , \t",genofilen); // skip column
+    cursor.get(key, value, mdb_fetch);
+    mdb_fetch = MDB_NEXT;
+    size_t num_floats = value.size() / sizeof(float);
+    assert(num_floats == ni_total);
+    const float* gsbuf = reinterpret_cast<const float*>(value.data());
+    auto snp = string(key);
+    std::vector<double> gs(gsbuf,gsbuf+sizeof(float)*value.size());
+    cout << "!!!!" << snp << endl;
 
-    gs.assign (ni_total,nan("")); // wipe values
-
-    for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok_safe2(NULL, " , \t",genofilen);
-      if (strcmp(ch_ptr, "NA") != 0) {
-        gs[i] = atof(ch_ptr);
-        if (is_strict_mode() && gs[i] == 0.0)
-          enforce_is_float(std::string(ch_ptr)); // only allow for NA and valid numbers
-      }
-    }
     return std::make_tuple(snp,gs);
   };
 
   LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps);
-
-  genofile.close();
-  genofile.clear();
 }