about summary refs log tree commit diff
path: root/src/gemma_io.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gemma_io.cpp')
-rw-r--r--src/gemma_io.cpp66
1 files changed, 56 insertions, 10 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index e183e3f..e630f64 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1478,8 +1478,27 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
 
 
 // Read bimbam mean genotype file and calculate kinship matrix.
-gsl_matrix *mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode) {
+gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string loco) {
   checkpoint("mdb-kin",file_geno);
+  bool is_loco = !loco.empty();
+
+  // Convert loco string to what we use in the chrpos index
+  uint8_t loco_chr = 0;
+  if (is_loco) {
+    if (loco == "X") {
+      loco_chr = CHR_X;
+    } else if (loco == "Y") {
+      loco_chr = CHR_Y;
+    } else if (loco == "M") {
+      loco_chr = CHR_M;
+    } else {
+      try {
+        loco_chr = static_cast<uint8_t>(stoi(loco));
+      } catch (...) {
+        loco_chr = 0;
+      }
+    }
+  }
 
   /* Create and open the LMDB environment: */
   auto env = lmdb::env::create();
@@ -1487,6 +1506,7 @@ gsl_matrix *mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode)
   env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */
   env.set_max_dbs(10);
   env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
+  string format;
 
   size_t ni_total = 0, num_markers = 0;
   {
@@ -1502,12 +1522,17 @@ gsl_matrix *mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode)
     std::string_view view_int;
     info.get(rtxn, "numsamples", view_int);
     ni_total = lmdb::from_sv<size_t>(view_int);
+    string_view info_key,info_value;
+    info.get(rtxn,"format",info_value);
+    format = string(info_value);
 
     MDB_stat stat;
     mdb_stat(rtxn, info, &stat);
-    assert(stat.ms_entries == 3);
+    assert(stat.ms_entries == 5);
   }
+
   auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+
   auto geno_mdb = lmdb::dbi::open(rtxn, "geno");
 
   MDB_stat stat;
@@ -1548,15 +1573,36 @@ gsl_matrix *mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode)
     string_view key, value;
     cursor.get(key, value, mdb_fetch);
     mdb_fetch = MDB_NEXT;
-    /* FIXME!
-    if (indicator_snp[t] == 0)
+
+    const uint8_t* data = reinterpret_cast<const uint8_t*>(key.data());
+    auto chr = static_cast<uint8_t>(data[1]);
+
+    if (is_loco && loco_chr == chr)
       continue;
-    */
-#if !defined NDEBUG
-    size_t num_floats = value.size() / sizeof(float);
-    assert(num_floats == ni_total);
-#endif
-    const float* gs = reinterpret_cast<const float*>(value.data());
+
+    // ---- Depending on the format we get different buffers - currently float and byte are supported:
+    vector<double> gs;
+    if (format == "Gb") {
+      size_t num_bytes = value.size() / sizeof(uint8_t);
+      assert(num_bytes == ni_total);
+      auto size = num_bytes;
+      const uint8_t* gs_bbuf = reinterpret_cast<const uint8_t*>(value.data());
+      gs.reserve(size);
+      for (size_t i = 0; i < size; ++i) {
+        double g = static_cast<double>(gs_bbuf[i])/127.0;
+        gs.push_back(g);
+      }
+    } else {
+      size_t num_floats = value.size() / sizeof(float);
+      assert(num_floats == ni_total);
+      auto size = num_floats;
+      const float* gs_fbuf = reinterpret_cast<const float*>(value.data());
+      gs.reserve(size);
+      for (size_t i = 0; i < size; ++i) {
+        gs.push_back(static_cast<double>(gs_fbuf[i]));
+      }
+    }
+
     size_t n_miss = 0;
     double geno_mean = 0.0;
     double geno_var = 0.0;