about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorPjotr Prins2025-12-05 16:15:14 +0100
committerPjotr Prins2025-12-05 16:15:14 +0100
commitd3c03a392c6374a135c1a97da52b2cb76a178151 (patch)
tree5a450a89ab88b315d98f2b45b23257cce435347f /src
parente3207cbb0068e1f9580ae5c9c585bc3a2b2c6ca6 (diff)
downloadpangemma-d3c03a392c6374a135c1a97da52b2cb76a178151.tar.gz
Add support for Gb genotype byte representation
Diffstat (limited to 'src')
-rw-r--r--src/lmm.cpp37
1 files changed, 26 insertions, 11 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 10e3e53..1d7485c 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -2184,6 +2184,10 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
   auto geno_mdb = lmdb::dbi::open(rtxn, "geno");
 
   auto marker_mdb = lmdb::dbi::open(rtxn, "marker");
+  auto info_mdb = lmdb::dbi::open(rtxn, "info");
+  string_view info_key,info_value;
+  info_mdb.get(rtxn,"format",info_value);
+  auto format = string(info_value);
 
   MDB_stat stat;
   mdb_stat(rtxn, geno_mdb, &stat);
@@ -2218,11 +2222,28 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
     MarkerInfo markerinfo;
 
     if (success) {
-      size_t num_floats = value.size() / sizeof(float);
-      assert(num_floats == ni_total);
-      const float* gsbuf = reinterpret_cast<const float*>(value.data());
-
-      // unpack chr+pos from key
+      size_t size = 0;
+      if (format == "Gb") {
+        size_t num_bytes = value.size() / sizeof(uint8_t);
+        assert(num_bytes == ni_total);
+        size = num_bytes;
+        const uint8_t* gs_bbuf = reinterpret_cast<const uint8_t*>(value.data());
+        gs.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+          double g = static_cast<double>(gs_bbuf[i])/127.0;
+          gs.push_back(g);
+        }
+      } else {
+        size_t num_floats = value.size() / sizeof(float);
+        assert(num_floats == ni_total);
+        size = num_floats;
+        const float* gs_fbuf = reinterpret_cast<const float*>(value.data());
+        gs.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+          gs.push_back(static_cast<double>(gs_fbuf[i]));
+        }
+      }
+      // Start unpacking key chr+pos
       if (key.size() != 10) {
         cerr << "key.size=" << key.size() << endl;
         throw std::runtime_error("Invalid key size");
@@ -2244,12 +2265,6 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
       // 1       rs13476251      174792257
       // cout << static_cast<int>(chr) << ":" << pos2 << " line " << rest2 << ":" << marker << endl ;
 
-      auto size = num_floats;
-      gs.reserve(size);
-      for (size_t i = 0; i < size; ++i) {
-        gs.push_back(static_cast<double>(gsbuf[i]));
-      }
-
       // compute maf and n_miss (NAs)
       size_t n_miss = 0; // count NAs: FIXME
       double maf = compute_maf(ni_total, ni_test, n_miss, gs.data(), indicator_idv);