diff options
| author | Pjotr Prins | 2025-12-03 08:09:29 +0100 |
|---|---|---|
| committer | Pjotr Prins | 2025-12-03 08:09:29 +0100 |
| commit | 3f1b09495410ca2b291c486230083b5c19a03080 (patch) | |
| tree | b90b9fd9e43d14ad7faa5395c2a16085c45db373 /src | |
| parent | 47580692539267c03ced4315aa9868b7c999a693 (diff) | |
| download | pangemma-3f1b09495410ca2b291c486230083b5c19a03080.tar.gz | |
Trying to optimize mdb read-ahead
Diffstat (limited to 'src')
| -rw-r--r-- | src/lmm.cpp | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp index 1e5e229..aa5036e 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -2084,6 +2084,9 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, } +#include <lmdb.h> +#include <sys/mman.h> + void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, @@ -2094,17 +2097,32 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, // enforce_msg(num_snps > 0,"Zero SNPs to process - data corrupt?"); auto env = lmdb::env::create(); - env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */ env.set_max_dbs(10); env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664); + // Get mmap info using lmdb++ wrapper + MDB_envinfo info; + mdb_env_info(env.handle(), &info); + // Aggressive readahead hints +#ifndef MADV_SEQUENTIAL +#define MADV_SEQUENTIAL 2 +#endif + +#ifndef MADV_WILLNEED +#define MADV_WILLNEED 3 +#endif + madvise(info.me_mapaddr, info.me_mapsize, MADV_SEQUENTIAL); + madvise(info.me_mapaddr, info.me_mapsize, MADV_WILLNEED); + + std::cout << "LMDB opened with optimized readahead" << std::endl; + std::cout << "Map size: " << (info.me_mapsize / 1024 / 1024) << " MB" << std::endl; + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); auto geno_mdb = lmdb::dbi::open(rtxn, "geno"); - MDB_stat stat; mdb_stat(rtxn, geno_mdb, &stat); - cout << "Number of records: " << stat.ms_entries << endl; + // cout << "Number of records: " << stat.ms_entries << endl; auto num_markers = stat.ms_entries; // fetch_snp is a callback function for every SNP row @@ -2114,6 +2132,9 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, auto mdb_fetch = MDB_FIRST; auto cursor = lmdb::cursor::open(rtxn, geno_mdb); + cout << "## number of total individuals = " << ni_total << endl; + cout << "## number of analyzed individuals = " << ni_total << endl; + cout << "## number of analyzed SNPs/var = " << num_markers << endl; std::function<SnpNameValues2(size_t)> fetch_snp = [&](size_t num) { |
