diff options
| author | Pjotr Prins | 2025-11-30 07:41:48 +0100 |
|---|---|---|
| committer | Pjotr Prins | 2025-11-30 07:41:48 +0100 |
| commit | 882a426c70370e50a521435e1b2175598e0fe411 (patch) | |
| tree | 0a6f5abd5f325aca6d5b39de33654d1a281122bd /src | |
| parent | 16bf371dff5d1ec20a7ce924c3329dd298e10de3 (diff) | |
| download | pangemma-882a426c70370e50a521435e1b2175598e0fe411.tar.gz | |
Fetch ni_total from lmdb
Diffstat (limited to 'src')
| -rw-r--r-- | src/gemma_io.cpp | 83 | ||||
| -rw-r--r-- | src/gemma_io.h | 2 | ||||
| -rw-r--r-- | src/param.cpp | 4 | ||||
| -rw-r--r-- | src/param.h | 2 |
4 files changed, 44 insertions, 47 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 046b152..6265417 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1478,7 +1478,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) { #include <lmdb++.h> // Read bimbam mean genotype file and calculate kinship matrix. -int mdb_calc_kin(const string file_geno, const set<string> ksnps, +int mdb_calc_kin(const string file_geno, bool is_loco, vector<int> &indicator_snp, const int k_mode) { checkpoint("mdb-kin",file_geno); @@ -1489,62 +1489,56 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps, env.set_max_dbs(10); env.open("example/mouse_hs1940.geno.mdb", MDB_RDONLY | MDB_NOSUBDIR, 0664); -// Print out all the values using a cursor: /* { + // Print out all the values using a cursor: auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); dbi = lmdb::dbi::open(rtxn, "mouse_hs1940.geno.txt.mdb"); { auto cursor = lmdb::cursor::open(rtxn, dbi); - std::string_view key, value; + string_view key, value; int count = 0; if (cursor.get(key, value, MDB_FIRST)) { do { - std::cout << count++ << " key: " << key << endl ; // " value: " << value << std::endl; + cout << count++ << " key: " << key << endl ; // " value: " << value << endl; } while (cursor.get(key, value, MDB_NEXT)); } } // destroying cursor before committing/aborting transaction (see below) } */ - // In a read-only transaction, get and print one of the values: - { - auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); - auto info = lmdb::dbi::open(rtxn, "info"); - - // Get statistics - ms_entries contains the number of records - MDB_stat stat; - mdb_stat(rtxn, info, &stat); - - std::cout << "Number of records: " << stat.ms_entries << std::endl; - - std::string_view meta; - if (info.get(rtxn, "meta", meta)) { - std::cout << meta << std::endl; - } else { - std::cout << "meta not found!" << std::endl; - } - } // rtxn aborted automatically - { - auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); - auto geno = lmdb::dbi::open(rtxn, "geno"); - - // Get statistics - ms_entries contains the number of records - MDB_stat stat; - mdb_stat(rtxn, geno, &stat); - - std::cout << "Number of records: " << stat.ms_entries << std::endl; - } // rtxn aborted automatically + size_t ni_total = 0; + { + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + auto info = lmdb::dbi::open(rtxn, "info"); - size_t n_miss; - double geno_mean, geno_var; + string_view meta; + if (info.get(rtxn, "meta", meta)) { + cout << meta << endl; + } else { + cout << "meta not found!" << endl; + } + std::string_view view_int; + info.get(rtxn, "numsamples", view_int); + ni_total = lmdb::from_sv<size_t>(view_int); - // setKSnp and/or LOCO support - bool process_ksnps = ksnps.size(); + MDB_stat stat; + mdb_stat(rtxn, info, &stat); + assert(stat.ms_entries == 3); + } + { + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + auto geno = lmdb::dbi::open(rtxn, "geno"); - gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(100,100); - size_t ni_total = matrix_kin->size1; - cout << "!!!!" << ni_total << endl; + // Get statistics - ms_entries contains the number of records + MDB_stat stat; + mdb_stat(rtxn, geno, &stat); + + cout << "Number of records: " << stat.ms_entries << endl; + } + + gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(ni_total,ni_total); + // cout << "ni_total: " << ni_total << endl; gsl_vector *geno = gsl_vector_safe_alloc(ni_total); gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_total); @@ -1582,17 +1576,17 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps, const char *snp = tokens[0]; // first field // cout << snp << "!"; // check whether SNP is included in ksnps (used by LOCO) - if (process_ksnps && ksnps.count(snp) == 0) - continue; + // if (process_ksnps && ksnps.count(snp) == 0) + // continue; token_i++; // skip snp name token_i++; // skip nucleotide field token_i++; // skip nucleotide field // calc SNP stats - geno_mean = 0.0; - n_miss = 0; - geno_var = 0.0; + size_t n_miss = 0; + double geno_mean = 0.0; + double geno_var = 0.0; gsl_vector_set_all(geno_miss, 0); for (size_t i = 0; i < ni_total; ++i) { enforce_str(token_i != tokens.end(), line + " number of fields"); @@ -1674,6 +1668,7 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps, gsl_vector_free(geno); gsl_vector_free(geno_miss); gsl_matrix_free(Xlarge); + gsl_matrix_free(matrix_kin); return true; } diff --git a/src/gemma_io.h b/src/gemma_io.h index a20007a..593d704 100644 --- a/src/gemma_io.h +++ b/src/gemma_io.h @@ -87,7 +87,7 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv, void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U); void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval); -int mdb_calc_kin(const string file_geno, const set<string> ksnps, +int mdb_calc_kin(const string file_geno, bool is_loco, vector<int> &indicator_snp, const int mode); bool BimbamKin(const string file_geno, const set<string> ksnps, vector<int> &indicator_snp, const int k_mode, diff --git a/src/param.cpp b/src/param.cpp index a720801..05e4a7b 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -1120,7 +1120,7 @@ void PARAM::CheckData(void) { } // Output some information. - if (file_cor.empty() && file_mstudy.empty() && file_study.empty() && + if (!is_mdb && file_cor.empty() && file_mstudy.empty() && file_study.empty() && a_mode != 15 && a_mode != 27 && a_mode != 28) { cout << "## number of total individuals = " << ni_total << endl; if (a_mode == 43) { @@ -1294,7 +1294,7 @@ void PARAM::ReadBIMBAMGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_ } void PARAM::MdbCalcKin() { - error = !mdb_calc_kin(file_geno, setKSnps, indicator_snp, a_mode - 20); + error = !mdb_calc_kin(file_geno, is_loco(), indicator_snp, a_mode - 20); return; } diff --git a/src/param.h b/src/param.h index a6dfac8..aa08b5d 100644 --- a/src/param.h +++ b/src/param.h @@ -369,6 +369,8 @@ public: const map<string, double> &mapRS2z, gsl_vector *w, gsl_vector *z, vector<size_t> &vec_cat); void UpdateSNP(const map<string, double> &mapRS2wA); + + inline bool is_loco() { return !loco.empty(); } }; size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt); |
