From 8a12a2f1833eec0d0d3e435c4eb9dc4a079b551a Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 9 Dec 2025 07:32:00 +0100 Subject: Compute K for Gb --- src/gemma_io.cpp | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'src/gemma_io.cpp') diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 3e290f4..e630f64 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1506,6 +1506,7 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */ env.set_max_dbs(10); env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664); + string format; size_t ni_total = 0, num_markers = 0; { @@ -1521,12 +1522,17 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string std::string_view view_int; info.get(rtxn, "numsamples", view_int); ni_total = lmdb::from_sv(view_int); + string_view info_key,info_value; + info.get(rtxn,"format",info_value); + format = string(info_value); MDB_stat stat; mdb_stat(rtxn, info, &stat); - assert(stat.ms_entries == 3); + assert(stat.ms_entries == 5); } + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + auto geno_mdb = lmdb::dbi::open(rtxn, "geno"); MDB_stat stat; @@ -1567,21 +1573,36 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string string_view key, value; cursor.get(key, value, mdb_fetch); mdb_fetch = MDB_NEXT; - /* FIXME! - if (indicator_snp[t] == 0) - continue; - */ + const uint8_t* data = reinterpret_cast(key.data()); auto chr = static_cast(data[1]); if (is_loco && loco_chr == chr) continue; -#if !defined NDEBUG - size_t num_floats = value.size() / sizeof(float); - assert(num_floats == ni_total); -#endif - const float* gs = reinterpret_cast(value.data()); + // ---- Depending on the format we get different buffers - currently float and byte are supported: + vector gs; + if (format == "Gb") { + size_t num_bytes = value.size() / sizeof(uint8_t); + assert(num_bytes == ni_total); + auto size = num_bytes; + const uint8_t* gs_bbuf = reinterpret_cast(value.data()); + gs.reserve(size); + for (size_t i = 0; i < size; ++i) { + double g = static_cast(gs_bbuf[i])/127.0; + gs.push_back(g); + } + } else { + size_t num_floats = value.size() / sizeof(float); + assert(num_floats == ni_total); + auto size = num_floats; + const float* gs_fbuf = reinterpret_cast(value.data()); + gs.reserve(size); + for (size_t i = 0; i < size; ++i) { + gs.push_back(static_cast(gs_fbuf[i])); + } + } + size_t n_miss = 0; double geno_mean = 0.0; double geno_var = 0.0; -- cgit 1.4.1