about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorPjotr Prins2025-11-30 07:41:48 +0100
committerPjotr Prins2025-11-30 07:41:48 +0100
commit882a426c70370e50a521435e1b2175598e0fe411 (patch)
tree0a6f5abd5f325aca6d5b39de33654d1a281122bd /src
parent16bf371dff5d1ec20a7ce924c3329dd298e10de3 (diff)
downloadpangemma-882a426c70370e50a521435e1b2175598e0fe411.tar.gz
Fetch ni_total from lmdb
Diffstat (limited to 'src')
-rw-r--r--src/gemma_io.cpp83
-rw-r--r--src/gemma_io.h2
-rw-r--r--src/param.cpp4
-rw-r--r--src/param.h2
4 files changed, 44 insertions, 47 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index 046b152..6265417 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1478,7 +1478,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
 #include <lmdb++.h>
 
 // Read bimbam mean genotype file and calculate kinship matrix.
-int mdb_calc_kin(const string file_geno, const set<string> ksnps,
+int mdb_calc_kin(const string file_geno, bool is_loco,
                vector<int> &indicator_snp, const int k_mode) {
   checkpoint("mdb-kin",file_geno);
 
@@ -1489,62 +1489,56 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps,
   env.set_max_dbs(10);
   env.open("example/mouse_hs1940.geno.mdb", MDB_RDONLY | MDB_NOSUBDIR, 0664);
 
-// Print out all the values using a cursor:
   /*
   {
+    // Print out all the values using a cursor:
     auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
     dbi = lmdb::dbi::open(rtxn, "mouse_hs1940.geno.txt.mdb");
     {
       auto cursor = lmdb::cursor::open(rtxn, dbi);
 
-      std::string_view key, value;
+      string_view key, value;
       int count = 0;
       if (cursor.get(key, value, MDB_FIRST)) {
         do {
-          std::cout << count++ << " key: " << key << endl ; // "  value: " << value << std::endl;
+          cout << count++ << " key: " << key << endl ; // "  value: " << value << endl;
         } while (cursor.get(key, value, MDB_NEXT));
       }
     } // destroying cursor before committing/aborting transaction (see below)
   }
   */
-   // In a read-only transaction, get and print one of the values:
-   {
-       auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
-       auto info = lmdb::dbi::open(rtxn, "info");
-
-       // Get statistics - ms_entries contains the number of records
-       MDB_stat stat;
-       mdb_stat(rtxn, info, &stat);
-
-       std::cout << "Number of records: " << stat.ms_entries << std::endl;
-
-       std::string_view meta;
-       if (info.get(rtxn, "meta", meta)) {
-           std::cout << meta << std::endl;
-       } else {
-           std::cout << "meta not found!" << std::endl;
-       }
-   } // rtxn aborted automatically
-   {
-       auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
-       auto geno = lmdb::dbi::open(rtxn, "geno");
-
-       // Get statistics - ms_entries contains the number of records
-       MDB_stat stat;
-       mdb_stat(rtxn, geno, &stat);
-
-       std::cout << "Number of records: " << stat.ms_entries << std::endl;
-   } // rtxn aborted automatically
+  size_t ni_total = 0;
+  {
+    auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+    auto info = lmdb::dbi::open(rtxn, "info");
 
-  size_t n_miss;
-  double geno_mean, geno_var;
+    string_view meta;
+    if (info.get(rtxn, "meta", meta)) {
+      cout << meta << endl;
+    } else {
+      cout << "meta not found!" << endl;
+    }
+    std::string_view view_int;
+    info.get(rtxn, "numsamples", view_int);
+    ni_total = lmdb::from_sv<size_t>(view_int);
 
-  // setKSnp and/or LOCO support
-  bool process_ksnps = ksnps.size();
+    MDB_stat stat;
+    mdb_stat(rtxn, info, &stat);
+    assert(stat.ms_entries == 3);
+  }
+  {
+    auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+    auto geno = lmdb::dbi::open(rtxn, "geno");
 
-  gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(100,100);
-  size_t ni_total = matrix_kin->size1;
-  cout << "!!!!" << ni_total << endl;
+    // Get statistics - ms_entries contains the number of records
+    MDB_stat stat;
+    mdb_stat(rtxn, geno, &stat);
+
+    cout << "Number of records: " << stat.ms_entries << endl;
+  }
+
+  gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(ni_total,ni_total);
+  // cout << "ni_total: " << ni_total << endl;
   gsl_vector *geno = gsl_vector_safe_alloc(ni_total);
   gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_total);
 
@@ -1582,17 +1576,17 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps,
     const char *snp = tokens[0]; // first field
     // cout << snp << "!";
     // check whether SNP is included in ksnps (used by LOCO)
-    if (process_ksnps && ksnps.count(snp) == 0)
-      continue;
+    // if (process_ksnps && ksnps.count(snp) == 0)
+    //   continue;
 
     token_i++; // skip snp name
     token_i++; // skip nucleotide field
     token_i++; // skip nucleotide field
 
     // calc SNP stats
-    geno_mean = 0.0;
-    n_miss = 0;
-    geno_var = 0.0;
+    size_t n_miss = 0;
+    double geno_mean = 0.0;
+    double geno_var = 0.0;
     gsl_vector_set_all(geno_miss, 0);
     for (size_t i = 0; i < ni_total; ++i) {
       enforce_str(token_i != tokens.end(), line + " number of fields");
@@ -1674,6 +1668,7 @@ int mdb_calc_kin(const string file_geno, const set<string> ksnps,
   gsl_vector_free(geno);
   gsl_vector_free(geno_miss);
   gsl_matrix_free(Xlarge);
+  gsl_matrix_free(matrix_kin);
 
   return true;
 }
diff --git a/src/gemma_io.h b/src/gemma_io.h
index a20007a..593d704 100644
--- a/src/gemma_io.h
+++ b/src/gemma_io.h
@@ -87,7 +87,7 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
 void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U);
 void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval);
 
-int mdb_calc_kin(const string file_geno, const set<string> ksnps,
+int mdb_calc_kin(const string file_geno, bool is_loco,
             vector<int> &indicator_snp, const int mode);
 bool BimbamKin(const string file_geno, const set<string> ksnps,
                vector<int> &indicator_snp, const int k_mode,
diff --git a/src/param.cpp b/src/param.cpp
index a720801..05e4a7b 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -1120,7 +1120,7 @@ void PARAM::CheckData(void) {
   }
 
   // Output some information.
-  if (file_cor.empty() && file_mstudy.empty() && file_study.empty() &&
+  if (!is_mdb && file_cor.empty() && file_mstudy.empty() && file_study.empty() &&
       a_mode != 15 && a_mode != 27 && a_mode != 28) {
     cout << "## number of total individuals = " << ni_total << endl;
     if (a_mode == 43) {
@@ -1294,7 +1294,7 @@ void PARAM::ReadBIMBAMGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_
 }
 
 void PARAM::MdbCalcKin() {
-  error = !mdb_calc_kin(file_geno, setKSnps, indicator_snp, a_mode - 20);
+  error = !mdb_calc_kin(file_geno, is_loco(), indicator_snp, a_mode - 20);
   return;
 }
 
diff --git a/src/param.h b/src/param.h
index a6dfac8..aa08b5d 100644
--- a/src/param.h
+++ b/src/param.h
@@ -369,6 +369,8 @@ public:
                    const map<string, double> &mapRS2z, gsl_vector *w,
                    gsl_vector *z, vector<size_t> &vec_cat);
   void UpdateSNP(const map<string, double> &mapRS2wA);
+
+  inline bool is_loco() { return !loco.empty(); }
 };
 
 size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt);