diff options
Diffstat (limited to 'src/gemma_io.cpp')
| -rw-r--r-- | src/gemma_io.cpp | 41 |
1 files changed, 31 insertions, 10 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 3e290f4..e630f64 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1506,6 +1506,7 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */ env.set_max_dbs(10); env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664); + string format; size_t ni_total = 0, num_markers = 0; { @@ -1521,12 +1522,17 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string std::string_view view_int; info.get(rtxn, "numsamples", view_int); ni_total = lmdb::from_sv<size_t>(view_int); + string_view info_key,info_value; + info.get(rtxn,"format",info_value); + format = string(info_value); MDB_stat stat; mdb_stat(rtxn, info, &stat); - assert(stat.ms_entries == 3); + assert(stat.ms_entries == 5); } + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + auto geno_mdb = lmdb::dbi::open(rtxn, "geno"); MDB_stat stat; @@ -1567,21 +1573,36 @@ gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string string_view key, value; cursor.get(key, value, mdb_fetch); mdb_fetch = MDB_NEXT; - /* FIXME! - if (indicator_snp[t] == 0) - continue; - */ + const uint8_t* data = reinterpret_cast<const uint8_t*>(key.data()); auto chr = static_cast<uint8_t>(data[1]); if (is_loco && loco_chr == chr) continue; -#if !defined NDEBUG - size_t num_floats = value.size() / sizeof(float); - assert(num_floats == ni_total); -#endif - const float* gs = reinterpret_cast<const float*>(value.data()); + // ---- Depending on the format we get different buffers - currently float and byte are supported: + vector<double> gs; + if (format == "Gb") { + size_t num_bytes = value.size() / sizeof(uint8_t); + assert(num_bytes == ni_total); + auto size = num_bytes; + const uint8_t* gs_bbuf = reinterpret_cast<const uint8_t*>(value.data()); + gs.reserve(size); + for (size_t i = 0; i < size; ++i) { + double g = static_cast<double>(gs_bbuf[i])/127.0; + gs.push_back(g); + } + } else { + size_t num_floats = value.size() / sizeof(float); + assert(num_floats == ni_total); + auto size = num_floats; + const float* gs_fbuf = reinterpret_cast<const float*>(value.data()); + gs.reserve(size); + for (size_t i = 0; i < size; ++i) { + gs.push_back(static_cast<double>(gs_fbuf[i])); + } + } + size_t n_miss = 0; double geno_mean = 0.0; double geno_var = 0.0; |
