about summary refs log tree commit diff
path: root/src/gemma_io.cpp
diff options
context:
space:
mode:
authorPjotr Prins2025-11-30 08:53:10 +0100
committerPjotr Prins2025-11-30 08:53:10 +0100
commitab434bbd38a61941bf32afcf91b7cc397462a53b (patch)
tree6bddea2b6a706a6f91ec98a654627f3458e84f9f /src/gemma_io.cpp
parent254113f4e02d5b423af302e9facb0ed580749ef5 (diff)
downloadpangemma-ab434bbd38a61941bf32afcf91b7cc397462a53b.tar.gz
Write out K
Diffstat (limited to 'src/gemma_io.cpp')
-rw-r--r--src/gemma_io.cpp29
1 files changed, 3 insertions, 26 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index 8fa0eab..baa44b9 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1477,7 +1477,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
 
 
 // Read bimbam mean genotype file and calculate kinship matrix.
-int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const vector<int> &indicator_snp) {
+gsl_matrix *mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const vector<int> &indicator_snp) {
   checkpoint("mdb-kin",file_geno);
 
   /* Create and open the LMDB environment: */
@@ -1537,20 +1537,10 @@ int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const v
     if (t % display_pace == 0) {
       ProgressBar("Reading SNPs", t, num_markers - 1);
     }
-
-    // auto tokens = tokenize_whitespace(line,ni_total+3,"");
-    // auto token_i = tokens.begin();
-    // const char *snp = tokens[0]; // first field
-    // token_i++; // skip snp name
-    // token_i++; // skip nucleotide field
-    // token_i++; // skip nucleotide field
-
     string_view key, value;
     cursor.get(key, value, mdb_fetch);
     mdb_fetch = MDB_NEXT;
-    // cout << " key: " << key << endl ;
     size_t num_floats = value.size() / sizeof(float);
-    // cout << " num: " << num_floats << endl ;
     assert(num_floats == ni_total);
     const float* gs = reinterpret_cast<const float*>(value.data());
     size_t n_miss = 0;
@@ -1559,23 +1549,11 @@ int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const v
     gsl_vector_set_all(geno_miss, 0);
 
     for (size_t i = 0; i < ni_total; ++i) {
-      // enforce_str(token_i != tokens.end(), line + " number of fields");
-      // auto field = *token_i;
-      // auto sfield = std::string(field);
-      // cout << i << ":" << sfield << "," << endl;
-      // if (strncmp(field,"NA",2)==0) { // missing value
-      //  gsl_vector_set(geno_miss, i, 0);
-      //  n_miss++;
-      // } else {
       double d = gs[i];
-      // if (is_strict_mode() && d == 0.0)
-      //   enforce_is_float(std::string(field));  // rule out non NA and non-float fields
       gsl_vector_set(geno, i, d);
       gsl_vector_set(geno_miss, i, 1);
       geno_mean += d;
       geno_var += d * d;
-      // }
-      // token_i++;
     }
 
     geno_mean /= (double)(ni_total - n_miss);
@@ -1617,7 +1595,7 @@ int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const v
     fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
     write(matrix_kin,"K updated");
   }
-  ProgressBar("Reading SNPs", 100, 100);
+  ProgressBar("Reading SNPs", num_markers, num_markers);
   cout << endl;
 
   enforce_gsl(gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test));
@@ -1625,9 +1603,8 @@ int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const v
   gsl_vector_free(geno);
   gsl_vector_free(geno_miss);
   gsl_matrix_free(Xlarge);
-  gsl_matrix_free(matrix_kin);
 
-  return true;
+  return matrix_kin;
 }
 
 // Read bimbam mean genotype file and calculate kinship matrix.