From 8a12a2f1833eec0d0d3e435c4eb9dc4a079b551a Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 9 Dec 2025 07:32:00 +0100 Subject: Compute K for Gb --- src/gemma_io.cpp | 41 +++++++++++++++++++++++++++++++---------- src/lmm.cpp | 14 +++++++------- 2 files changed, 38 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 3e290f4..e630f64 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1506,6 +1506,7 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */ env.set_max_dbs(10); env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664); + string format; size_t ni_total = 0, num_markers = 0; { @@ -1521,12 +1522,17 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string std::string_view view_int; info.get(rtxn, "numsamples", view_int); ni_total = lmdb::from_sv(view_int); + string_view info_key,info_value; + info.get(rtxn,"format",info_value); + format = string(info_value); MDB_stat stat; mdb_stat(rtxn, info, &stat); - assert(stat.ms_entries == 3); + assert(stat.ms_entries == 5); } + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + auto geno_mdb = lmdb::dbi::open(rtxn, "geno"); MDB_stat stat; @@ -1567,21 +1573,36 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string string_view key, value; cursor.get(key, value, mdb_fetch); mdb_fetch = MDB_NEXT; - /* FIXME! - if (indicator_snp[t] == 0) - continue; - */ + const uint8_t* data = reinterpret_cast(key.data()); auto chr = static_cast(data[1]); if (is_loco && loco_chr == chr) continue; -#if !defined NDEBUG - size_t num_floats = value.size() / sizeof(float); - assert(num_floats == ni_total); -#endif - const float* gs = reinterpret_cast(value.data()); + // ---- Depending on the format we get different buffers - currently float and byte are supported: + vector gs; + if (format == "Gb") { + size_t num_bytes = value.size() / sizeof(uint8_t); + assert(num_bytes == ni_total); + auto size = num_bytes; + const uint8_t* gs_bbuf = reinterpret_cast(value.data()); + gs.reserve(size); + for (size_t i = 0; i < size; ++i) { + double g = static_cast(gs_bbuf[i])/127.0; + gs.push_back(g); + } + } else { + size_t num_floats = value.size() / sizeof(float); + assert(num_floats == ni_total); + auto size = num_floats; + const float* gs_fbuf = reinterpret_cast(value.data()); + gs.reserve(size); + for (size_t i = 0; i < size; ++i) { + gs.push_back(static_cast(gs_fbuf[i])); + } + } + size_t n_miss = 0; double geno_mean = 0.0; double geno_var = 0.0; diff --git a/src/lmm.cpp b/src/lmm.cpp index 129cbd6..969e6fc 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -2242,6 +2242,7 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, if (success) { size_t size = 0; + // ---- Depending on the format we get different buffers - currently float and byte are supported: if (format == "Gb") { size_t num_bytes = value.size() / sizeof(uint8_t); assert(num_bytes == ni_total); @@ -2270,13 +2271,6 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, // "S>L>L>" const uint8_t* data = reinterpret_cast(key.data()); auto chr = static_cast(data[1]); - - // printf("%#02x %#02x\n", chr, loco_chr); - - if (is_loco && loco_chr != chr) { - return make_tuple(false, markerinfo, gs); - } - // Extract big-endian uint32 // uint32_t rest = static_cast(data[2]); uint32_t pos = (data[2] << 24) | (data[3] << 16) | @@ -2285,6 +2279,12 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, uint32_t num = (data[6] << 24) | (data[7] << 16) | (data[8] << 8) | data[9]; + // printf("%#02x %#02x\n", chr, loco_chr); + + if (is_loco && loco_chr != chr) { + return make_tuple(false, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs); + } + string_view value2; marker_mdb.get(rtxn,key,value2); auto marker = string(value2); -- cgit 1.4.1