about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorPjotr Prins2025-12-03 08:09:29 +0100
committerPjotr Prins2025-12-03 08:09:29 +0100
commit3f1b09495410ca2b291c486230083b5c19a03080 (patch)
treeb90b9fd9e43d14ad7faa5395c2a16085c45db373 /src
parent47580692539267c03ced4315aa9868b7c999a693 (diff)
downloadpangemma-3f1b09495410ca2b291c486230083b5c19a03080.tar.gz
Trying to optimize mdb read-ahead
Diffstat (limited to 'src')
-rw-r--r--src/lmm.cpp27
1 files changed, 24 insertions, 3 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 1e5e229..aa5036e 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -2084,6 +2084,9 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
 
 }
 
+#include <lmdb.h>
+#include <sys/mman.h>
+
 void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
                           const gsl_matrix *UtW, const gsl_vector *Uty,
                           const gsl_matrix *W, const gsl_vector *y,
@@ -2094,17 +2097,32 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
   // enforce_msg(num_snps > 0,"Zero SNPs to process - data corrupt?");
 
   auto env = lmdb::env::create();
-
   env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */
   env.set_max_dbs(10);
   env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
+  // Get mmap info using lmdb++ wrapper
+  MDB_envinfo info;
+  mdb_env_info(env.handle(), &info);
+  // Aggressive readahead hints
+#ifndef MADV_SEQUENTIAL
+#define MADV_SEQUENTIAL 2
+#endif
+
+#ifndef MADV_WILLNEED
+#define MADV_WILLNEED 3
+#endif
+  madvise(info.me_mapaddr, info.me_mapsize, MADV_SEQUENTIAL);
+  madvise(info.me_mapaddr, info.me_mapsize, MADV_WILLNEED);
+
+  std::cout << "LMDB opened with optimized readahead" << std::endl;
+  std::cout << "Map size: " << (info.me_mapsize / 1024 / 1024) << " MB" << std::endl;
+
   auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
   auto geno_mdb = lmdb::dbi::open(rtxn, "geno");
 
-
   MDB_stat stat;
   mdb_stat(rtxn, geno_mdb, &stat);
-  cout << "Number of records: " << stat.ms_entries << endl;
+  // cout << "Number of records: " << stat.ms_entries << endl;
   auto num_markers = stat.ms_entries;
 
   // fetch_snp is a callback function for every SNP row
@@ -2114,6 +2132,9 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
   auto mdb_fetch = MDB_FIRST;
 
   auto cursor = lmdb::cursor::open(rtxn, geno_mdb);
+  cout << "## number of total individuals = " << ni_total << endl;
+  cout << "## number of analyzed individuals = " << ni_total << endl;
+  cout << "## number of analyzed SNPs/var = " << num_markers << endl;
 
   std::function<SnpNameValues2(size_t)>  fetch_snp = [&](size_t num) {