about summary refs log tree commit diff
path: root/src/gemma_io.cpp
diff options
context:
space:
mode:
authorPjotr Prins2025-11-29 07:53:09 +0100
committerPjotr Prins2025-11-29 07:53:09 +0100
commit16bf371dff5d1ec20a7ce924c3329dd298e10de3 (patch)
tree9d930572326fb9d9396097aef3fbec5c392a5c9c /src/gemma_io.cpp
parent1dcecc851c02785b19bdd9b25dbab3317ac9c886 (diff)
downloadpangemma-16bf371dff5d1ec20a7ce924c3329dd298e10de3.tar.gz
Introduce lmdb++
Diffstat (limited to 'src/gemma_io.cpp')
-rw-r--r--src/gemma_io.cpp163
1 files changed, 48 insertions, 115 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index a727337..046b152 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1474,7 +1474,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
   return;
 }
 
-#include <lmdb.h>
+// #include <lmdb.h>
 #include <lmdb++.h>
 
 // Read bimbam mean genotype file and calculate kinship matrix.
@@ -1484,124 +1484,57 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps,
 
   /* Create and open the LMDB environment: */
   auto env = lmdb::env::create();
-  env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL); /* 1 GiB */
-  env.open("./example.mdb/", 0, 0664);
-  lmdb::dbi dbi;
 
+  env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */
+  env.set_max_dbs(10);
+  env.open("example/mouse_hs1940.geno.mdb", MDB_RDONLY | MDB_NOSUBDIR, 0664);
 
-  MDB_env *env;
-  MDB_dbi dbi;
-  MDB_val key, data;
-  MDB_txn *txn;
-  MDB_cursor *cursor;
-  int rc;
-
-  // Create environment handle
-  rc = mdb_env_create(&env);
-  if (rc != 0) {
-    std::cerr << "mdb_env_create: " << mdb_strerror(rc) << std::endl;
-    return 1;
-  }
-  // Open the environment (directory containing the LMDB files)
-  rc = mdb_env_open(env, file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
-  if (rc != 0) {
-    std::cerr << "mdb_env_open: " << mdb_strerror(rc) << std::endl;
-    return 1;
-  }
-  mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
-  // Open the database in the environment
-  rc = mdb_open(txn, NULL, 0, &dbi);
-  if (rc != 0) {
-    std::cerr << "mdb_dbi_open: " << mdb_strerror(rc) << std::endl;
-    mdb_txn_abort(txn);
-    return 1;
-  }
-
-  char *skey = "meta\0";
-  key.mv_size = strlen(skey);
-  key.mv_data = skey;
-
-  // std::string skey = "rs13475963";
-  // key.mv_size = skey.size();
-  // key.mv_data = (void*)skey.c_str();
-  cerr << "HERE" << endl;
-
-  MDB_val val;
-  rc = mdb_get(txn, dbi, &key, &val);
-  if (rc != 0) {
-    std::cerr << "mdb_get: " << mdb_strerror(rc) << std::endl;
-    mdb_txn_abort(txn);
-    return 1;
-  }
-
-  printf("!!!!!!! %s\n", val.mv_data);
-
-  mdb_txn_abort(txn);
-
-//---
-  MDB_stat stat;
-  // Get statistics
-  rc = mdb_stat(txn, dbi, &stat);
-  if (rc != 0) {
-    std::cerr << "mdb_stat: " << mdb_strerror(rc) << std::endl;
-    mdb_txn_abort(txn);
-    return 1;
-  }
-
-  // Print number of entries
-  std::cout << "Number of keys: " << stat.ms_entries << std::endl;
-
-  cerr << "HERE" << endl;
-
-  // Create a transaction for reading
-  rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
-  if (rc != 0) {
-    std::cerr << "mdb_txn_begin: " << mdb_strerror(rc) << std::endl;
-    return 1;
-  }
-
-
-  // Create a cursor to iterate through all entries
-  rc = mdb_cursor_open(txn, dbi, &cursor);
-  if (rc != 0) {
-    std::cerr << "mdb_cursor_open: " << mdb_strerror(rc) << std::endl;
-    mdb_txn_abort(txn);
-    return 1;
-  }
-
-  cerr << "HERE" << endl;
-
-
-  std::string searchKey = "meta";
-  key.mv_size = searchKey.size();
-  key.mv_data = (void*)searchKey.c_str();
-
-  rc = mdb_get(txn, dbi, &key, &data);
-  if (rc == 0) {
-    std::string value((char*)data.mv_data, data.mv_size);
-    std::cout << "- Found: " << value << std::endl;
-  } else if (rc == MDB_NOTFOUND) {
-    std::cout << "--- Key not found" << std::endl;
-  } else {
-    std::cerr << "--- Error: " << mdb_strerror(rc) << std::endl;
-  }
-
-  cerr << "HERE HERE" << endl;
-
-  // Iterate through all key-value pairs
-  while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
-    std::string keyStr((char*)key.mv_data, key.mv_size);
-    std::string dataStr((char*)data.mv_data, data.mv_size);
+// Print out all the values using a cursor:
+  /*
+  {
+    auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+    dbi = lmdb::dbi::open(rtxn, "mouse_hs1940.geno.txt.mdb");
+    {
+      auto cursor = lmdb::cursor::open(rtxn, dbi);
 
-    std::cout << "Key: " << keyStr << std::endl;
-    // std::cout << "Key: " << keyStr << ", Value: " << dataStr << std::endl;
+      std::string_view key, value;
+      int count = 0;
+      if (cursor.get(key, value, MDB_FIRST)) {
+        do {
+          std::cout << count++ << " key: " << key << endl ; // "  value: " << value << std::endl;
+        } while (cursor.get(key, value, MDB_NEXT));
+      }
+    } // destroying cursor before committing/aborting transaction (see below)
   }
-
-  // Clean up
-  mdb_cursor_close(cursor);
-  mdb_txn_abort(txn);
-  mdb_dbi_close(env, dbi);
-  mdb_env_close(env);
+  */
+   // In a read-only transaction, get and print one of the values:
+   {
+       auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+       auto info = lmdb::dbi::open(rtxn, "info");
+
+       // Get statistics - ms_entries contains the number of records
+       MDB_stat stat;
+       mdb_stat(rtxn, info, &stat);
+
+       std::cout << "Number of records: " << stat.ms_entries << std::endl;
+
+       std::string_view meta;
+       if (info.get(rtxn, "meta", meta)) {
+           std::cout << meta << std::endl;
+       } else {
+           std::cout << "meta not found!" << std::endl;
+       }
+   } // rtxn aborted automatically
+   {
+       auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+       auto geno = lmdb::dbi::open(rtxn, "geno");
+
+       // Get statistics - ms_entries contains the number of records
+       MDB_stat stat;
+       mdb_stat(rtxn, geno, &stat);
+
+       std::cout << "Number of records: " << stat.ms_entries << std::endl;
+   } // rtxn aborted automatically
 
   size_t n_miss;
   double geno_mean, geno_var;