diff options
Diffstat (limited to 'src/gemma_io.cpp')
| -rw-r--r-- | src/gemma_io.cpp | 83 |
1 files changed, 39 insertions, 44 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 046b152..6265417 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1478,7 +1478,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) { #include <lmdb++.h> // Read bimbam mean genotype file and calculate kinship matrix. -int mdb_calc_kin(const string file_geno, const set<string> ksnps, +int mdb_calc_kin(const string file_geno, bool is_loco, vector<int> &indicator_snp, const int k_mode) { checkpoint("mdb-kin",file_geno); @@ -1489,62 +1489,56 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps, env.set_max_dbs(10); env.open("example/mouse_hs1940.geno.mdb", MDB_RDONLY | MDB_NOSUBDIR, 0664); -// Print out all the values using a cursor: /* { + // Print out all the values using a cursor: auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); dbi = lmdb::dbi::open(rtxn, "mouse_hs1940.geno.txt.mdb"); { auto cursor = lmdb::cursor::open(rtxn, dbi); - std::string_view key, value; + string_view key, value; int count = 0; if (cursor.get(key, value, MDB_FIRST)) { do { - std::cout << count++ << " key: " << key << endl ; // " value: " << value << std::endl; + cout << count++ << " key: " << key << endl ; // " value: " << value << endl; } while (cursor.get(key, value, MDB_NEXT)); } } // destroying cursor before committing/aborting transaction (see below) } */ - // In a read-only transaction, get and print one of the values: - { - auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); - auto info = lmdb::dbi::open(rtxn, "info"); - - // Get statistics - ms_entries contains the number of records - MDB_stat stat; - mdb_stat(rtxn, info, &stat); - - std::cout << "Number of records: " << stat.ms_entries << std::endl; - - std::string_view meta; - if (info.get(rtxn, "meta", meta)) { - std::cout << meta << std::endl; - } else { - std::cout << "meta not found!" << std::endl; - } - } // rtxn aborted automatically - { - auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); - auto geno = lmdb::dbi::open(rtxn, "geno"); - - // Get statistics - ms_entries contains the number of records - MDB_stat stat; - mdb_stat(rtxn, geno, &stat); - - std::cout << "Number of records: " << stat.ms_entries << std::endl; - } // rtxn aborted automatically + size_t ni_total = 0; + { + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + auto info = lmdb::dbi::open(rtxn, "info"); - size_t n_miss; - double geno_mean, geno_var; + string_view meta; + if (info.get(rtxn, "meta", meta)) { + cout << meta << endl; + } else { + cout << "meta not found!" << endl; + } + std::string_view view_int; + info.get(rtxn, "numsamples", view_int); + ni_total = lmdb::from_sv<size_t>(view_int); - // setKSnp and/or LOCO support - bool process_ksnps = ksnps.size(); + MDB_stat stat; + mdb_stat(rtxn, info, &stat); + assert(stat.ms_entries == 3); + } + { + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + auto geno = lmdb::dbi::open(rtxn, "geno"); - gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(100,100); - size_t ni_total = matrix_kin->size1; - cout << "!!!!" << ni_total << endl; + // Get statistics - ms_entries contains the number of records + MDB_stat stat; + mdb_stat(rtxn, geno, &stat); + + cout << "Number of records: " << stat.ms_entries << endl; + } + + gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(ni_total,ni_total); + // cout << "ni_total: " << ni_total << endl; gsl_vector *geno = gsl_vector_safe_alloc(ni_total); gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_total); @@ -1582,17 +1576,17 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps, const char *snp = tokens[0]; // first field // cout << snp << "!"; // check whether SNP is included in ksnps (used by LOCO) - if (process_ksnps && ksnps.count(snp) == 0) - continue; + // if (process_ksnps && ksnps.count(snp) == 0) + // continue; token_i++; // skip snp name token_i++; // skip nucleotide field token_i++; // skip nucleotide field // calc SNP stats - geno_mean = 0.0; - n_miss = 0; - geno_var = 0.0; + size_t n_miss = 0; + double geno_mean = 0.0; + double geno_var = 0.0; gsl_vector_set_all(geno_miss, 0); for (size_t i = 0; i < ni_total; ++i) { enforce_str(token_i != tokens.end(), line + " number of fields"); @@ -1674,6 +1668,7 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps, gsl_vector_free(geno); gsl_vector_free(geno_miss); gsl_matrix_free(Xlarge); + gsl_matrix_free(matrix_kin); return true; } |
