about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorPjotr Prins2025-12-09 07:32:00 +0100
committerPjotr Prins2025-12-09 07:32:00 +0100
commit8a12a2f1833eec0d0d3e435c4eb9dc4a079b551a (patch)
tree47f431760f30da92960d71879eaa09513525e465 /src
parent4486e91d7228e9c79950236d7f146db2a0b099ee (diff)
downloadpangemma-8a12a2f1833eec0d0d3e435c4eb9dc4a079b551a.tar.gz
Compute K for Gb
Diffstat (limited to 'src')
-rw-r--r--src/gemma_io.cpp41
-rw-r--r--src/lmm.cpp14
2 files changed, 38 insertions, 17 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index 3e290f4..e630f64 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1506,6 +1506,7 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string
   env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */
   env.set_max_dbs(10);
   env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
+  string format;
 
   size_t ni_total = 0, num_markers = 0;
   {
@@ -1521,12 +1522,17 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string
     std::string_view view_int;
     info.get(rtxn, "numsamples", view_int);
     ni_total = lmdb::from_sv<size_t>(view_int);
+    string_view info_key,info_value;
+    info.get(rtxn,"format",info_value);
+    format = string(info_value);
 
     MDB_stat stat;
     mdb_stat(rtxn, info, &stat);
-    assert(stat.ms_entries == 3);
+    assert(stat.ms_entries == 5);
   }
+
   auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+
   auto geno_mdb = lmdb::dbi::open(rtxn, "geno");
 
   MDB_stat stat;
@@ -1567,21 +1573,36 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string
     string_view key, value;
     cursor.get(key, value, mdb_fetch);
     mdb_fetch = MDB_NEXT;
-    /* FIXME!
-    if (indicator_snp[t] == 0)
-      continue;
-    */
+
     const uint8_t* data = reinterpret_cast<const uint8_t*>(key.data());
     auto chr = static_cast<uint8_t>(data[1]);
 
     if (is_loco && loco_chr == chr)
       continue;
 
-#if !defined NDEBUG
-    size_t num_floats = value.size() / sizeof(float);
-    assert(num_floats == ni_total);
-#endif
-    const float* gs = reinterpret_cast<const float*>(value.data());
+    // ---- Depending on the format we get different buffers - currently float and byte are supported:
+    vector<double> gs;
+    if (format == "Gb") {
+      size_t num_bytes = value.size() / sizeof(uint8_t);
+      assert(num_bytes == ni_total);
+      auto size = num_bytes;
+      const uint8_t* gs_bbuf = reinterpret_cast<const uint8_t*>(value.data());
+      gs.reserve(size);
+      for (size_t i = 0; i < size; ++i) {
+        double g = static_cast<double>(gs_bbuf[i])/127.0;
+        gs.push_back(g);
+      }
+    } else {
+      size_t num_floats = value.size() / sizeof(float);
+      assert(num_floats == ni_total);
+      auto size = num_floats;
+      const float* gs_fbuf = reinterpret_cast<const float*>(value.data());
+      gs.reserve(size);
+      for (size_t i = 0; i < size; ++i) {
+        gs.push_back(static_cast<double>(gs_fbuf[i]));
+      }
+    }
+
     size_t n_miss = 0;
     double geno_mean = 0.0;
     double geno_var = 0.0;
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 129cbd6..969e6fc 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -2242,6 +2242,7 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
 
     if (success) {
       size_t size = 0;
+      // ---- Depending on the format we get different buffers - currently float and byte are supported:
       if (format == "Gb") {
         size_t num_bytes = value.size() / sizeof(uint8_t);
         assert(num_bytes == ni_total);
@@ -2270,13 +2271,6 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
       // "S>L>L>"
       const uint8_t* data = reinterpret_cast<const uint8_t*>(key.data());
       auto chr = static_cast<uint8_t>(data[1]);
-
-      // printf("%#02x %#02x\n", chr, loco_chr);
-
-      if (is_loco && loco_chr != chr) {
-        return make_tuple(false, markerinfo, gs);
-      }
-
       // Extract big-endian uint32
       // uint32_t rest = static_cast<uint32_t>(data[2]);
       uint32_t pos =  (data[2] << 24) | (data[3] << 16) |
@@ -2285,6 +2279,12 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
       uint32_t num = (data[6] << 24) | (data[7] << 16) |
         (data[8] << 8) | data[9];
 
+      // printf("%#02x %#02x\n", chr, loco_chr);
+
+      if (is_loco && loco_chr != chr) {
+        return make_tuple(false, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs);
+      }
+
       string_view value2;
       marker_mdb.get(rtxn,key,value2);
       auto marker = string(value2);