about summary refs log tree commit diff
path: root/src/gemma_io.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gemma_io.cpp')
-rw-r--r--src/gemma_io.cpp83
1 files changed, 39 insertions, 44 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index 046b152..6265417 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1478,7 +1478,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
 #include <lmdb++.h>
 
 // Read bimbam mean genotype file and calculate kinship matrix.
-int mdb_calc_kin(const string file_geno, const set<string> ksnps,
+int mdb_calc_kin(const string file_geno, bool is_loco,
                vector<int> &indicator_snp, const int k_mode) {
   checkpoint("mdb-kin",file_geno);
 
@@ -1489,62 +1489,56 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps,
   env.set_max_dbs(10);
   env.open("example/mouse_hs1940.geno.mdb", MDB_RDONLY | MDB_NOSUBDIR, 0664);
 
-// Print out all the values using a cursor:
   /*
   {
+    // Print out all the values using a cursor:
     auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
     dbi = lmdb::dbi::open(rtxn, "mouse_hs1940.geno.txt.mdb");
     {
       auto cursor = lmdb::cursor::open(rtxn, dbi);
 
-      std::string_view key, value;
+      string_view key, value;
       int count = 0;
       if (cursor.get(key, value, MDB_FIRST)) {
         do {
-          std::cout << count++ << " key: " << key << endl ; // "  value: " << value << std::endl;
+          cout << count++ << " key: " << key << endl ; // "  value: " << value << endl;
         } while (cursor.get(key, value, MDB_NEXT));
       }
     } // destroying cursor before committing/aborting transaction (see below)
   }
   */
-   // In a read-only transaction, get and print one of the values:
-   {
-       auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
-       auto info = lmdb::dbi::open(rtxn, "info");
-
-       // Get statistics - ms_entries contains the number of records
-       MDB_stat stat;
-       mdb_stat(rtxn, info, &stat);
-
-       std::cout << "Number of records: " << stat.ms_entries << std::endl;
-
-       std::string_view meta;
-       if (info.get(rtxn, "meta", meta)) {
-           std::cout << meta << std::endl;
-       } else {
-           std::cout << "meta not found!" << std::endl;
-       }
-   } // rtxn aborted automatically
-   {
-       auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
-       auto geno = lmdb::dbi::open(rtxn, "geno");
-
-       // Get statistics - ms_entries contains the number of records
-       MDB_stat stat;
-       mdb_stat(rtxn, geno, &stat);
-
-       std::cout << "Number of records: " << stat.ms_entries << std::endl;
-   } // rtxn aborted automatically
+  size_t ni_total = 0;
+  {
+    auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+    auto info = lmdb::dbi::open(rtxn, "info");
 
-  size_t n_miss;
-  double geno_mean, geno_var;
+    string_view meta;
+    if (info.get(rtxn, "meta", meta)) {
+      cout << meta << endl;
+    } else {
+      cout << "meta not found!" << endl;
+    }
+    std::string_view view_int;
+    info.get(rtxn, "numsamples", view_int);
+    ni_total = lmdb::from_sv<size_t>(view_int);
 
-  // setKSnp and/or LOCO support
-  bool process_ksnps = ksnps.size();
+    MDB_stat stat;
+    mdb_stat(rtxn, info, &stat);
+    assert(stat.ms_entries == 3);
+  }
+  {
+    auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+    auto geno = lmdb::dbi::open(rtxn, "geno");
 
-  gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(100,100);
-  size_t ni_total = matrix_kin->size1;
-  cout << "!!!!" << ni_total << endl;
+    // Get statistics - ms_entries contains the number of records
+    MDB_stat stat;
+    mdb_stat(rtxn, geno, &stat);
+
+    cout << "Number of records: " << stat.ms_entries << endl;
+  }
+
+  gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(ni_total,ni_total);
+  // cout << "ni_total: " << ni_total << endl;
   gsl_vector *geno = gsl_vector_safe_alloc(ni_total);
   gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_total);
 
@@ -1582,17 +1576,17 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps,
     const char *snp = tokens[0]; // first field
     // cout << snp << "!";
     // check whether SNP is included in ksnps (used by LOCO)
-    if (process_ksnps && ksnps.count(snp) == 0)
-      continue;
+    // if (process_ksnps && ksnps.count(snp) == 0)
+    //   continue;
 
     token_i++; // skip snp name
     token_i++; // skip nucleotide field
     token_i++; // skip nucleotide field
 
     // calc SNP stats
-    geno_mean = 0.0;
-    n_miss = 0;
-    geno_var = 0.0;
+    size_t n_miss = 0;
+    double geno_mean = 0.0;
+    double geno_var = 0.0;
     gsl_vector_set_all(geno_miss, 0);
     for (size_t i = 0; i < ni_total; ++i) {
       enforce_str(token_i != tokens.end(), line + " number of fields");
@@ -1674,6 +1668,7 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps,
   gsl_vector_free(geno);
   gsl_vector_free(geno_miss);
   gsl_matrix_free(Xlarge);
+  gsl_matrix_free(matrix_kin);
 
   return true;
 }