about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2025-12-06 12:58:11 +0100
committerPjotr Prins2025-12-06 12:58:11 +0100
commit127ee6431063dc3e1b83cb1ef3113850697a88f2 (patch)
tree3d6e66ef6705cbedbe436456d9234a601b822ee0
parent4ca7fb55f0aaa1773626a69b48f3ed88e5b7f5a1 (diff)
downloadpangemma-127ee6431063dc3e1b83cb1ef3113850697a88f2.tar.gz
LOCO support works for mdb
-rw-r--r--src/lmm.cpp45
-rw-r--r--src/lmm.h8
2 files changed, 45 insertions, 8 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 7a80b8a..129cbd6 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -41,6 +41,8 @@
 #include "gsl/gsl_roots.h"
 #include "gsl/gsl_vector.h"
 #include <lmdb++.h>
+// #include <lmdb.h>
+#include <sys/mman.h>
 
 #include "gzstream.h"
 #include "gemma.h"
@@ -2097,8 +2099,17 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
     auto m = st.markerinfo;
     auto name = m.name;
     auto chr  = m.chr;
+    string chr_s;
+    if (chr == CHR_X)
+      chr_s = "X";
+    else if (chr == CHR_Y)
+      chr_s = "Y";
+    else if (chr == CHR_M)
+      chr_s = "M";
+    else
+      chr_s = to_string(chr);
 
-    outfile << chr << "\t";
+    outfile << chr_s << "\t";
     outfile << name << "\t";
     outfile << m.pos << "\t";
     outfile << m.n_miss << "\t-\t-\t"; // n_miss column + allele1 + allele0
@@ -2150,20 +2161,33 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
 
 }
 
-#include <lmdb.h>
-#include <sys/mman.h>
 
 void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
                           const gsl_matrix *UtW, const gsl_vector *Uty,
                        const gsl_matrix *W, const gsl_vector *y, const string loco) {
   checkpoint("mdb-calc-gwa",file_geno);
   bool is_loco = !loco.empty();
-  cout << loco << " CHR!!!!" << endl;
 
-  // const auto num_snps = indicator_snp.size();
-  // enforce_msg(num_snps > 0,"Zero SNPs to process - data corrupt?");
+  // Convert loco string to what we use in the chrpos index
+  uint8_t loco_chr;
+  if (is_loco) {
+    if (loco == "X") {
+      loco_chr = CHR_X;
+    } else if (loco == "Y") {
+      loco_chr = CHR_Y;
+    } else if (loco == "M") {
+      loco_chr = CHR_M;
+    } else {
+      try {
+        loco_chr = static_cast<uint8_t>(stoi(loco));
+      } catch (...) {
+        loco_chr = 0;
+      }
+    }
+  }
+
   auto env = lmdb::env::create();
-  env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */
+  env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL);
   env.set_max_dbs(10);
   env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
   // Get mmap info using lmdb++ wrapper
@@ -2246,6 +2270,13 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
       // "S>L>L>"
       const uint8_t* data = reinterpret_cast<const uint8_t*>(key.data());
       auto chr = static_cast<uint8_t>(data[1]);
+
+      // printf("%#02x %#02x\n", chr, loco_chr);
+
+      if (is_loco && loco_chr != chr) {
+        return make_tuple(false, markerinfo, gs);
+      }
+
       // Extract big-endian uint32
       // uint32_t rest = static_cast<uint32_t>(data[2]);
       uint32_t pos =  (data[2] << 24) | (data[3] << 16) |
diff --git a/src/lmm.h b/src/lmm.h
index f4f49aa..8a65f8d 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -45,10 +45,16 @@ public:
   size_t e_mode;
 };
 
+
+constexpr uint8_t CHR_X = 'X';
+constexpr uint8_t CHR_Y = 'Y';
+constexpr uint8_t CHR_M = 'M';
+
 // typedef tuple< string, uint16_t, uint32_t, uint32_t > MarkerInfo; // name, chr, pos, line
 struct MarkerInfo {
   string name;
-  size_t chr, pos, line_no, n_miss;
+  uint8_t chr;
+  size_t pos, line_no, n_miss;
   double maf;
 } ;