about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorPjotr Prins2025-11-27 10:20:49 +0100
committerPjotr Prins2025-11-27 10:23:18 +0100
commit6bd0ea3e2a245c3f86f66505eec118296ae04d30 (patch)
tree04ef903832c459490f508af2dc5ed474c4f7f22a /src
parent333bfc9adfff0fcd444a815cfcca27708442fe80 (diff)
downloadpangemma-6bd0ea3e2a245c3f86f66505eec118296ae04d30.tar.gz
No one uses these uchar versions
Diffstat (limited to 'src')
-rw-r--r--src/gemma_io.cpp111
-rw-r--r--src/gemma_io.h5
-rw-r--r--src/param.cpp32
-rw-r--r--src/param.h4
4 files changed, 10 insertions, 142 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index 3826cea..b1bc368 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -1907,117 +1907,6 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
   return true;
 }
 
-// Compact version of the above function, using uchar instead of
-// gsl_matrix.
-bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
-                   vector<int> &indicator_snp,
-                   vector<vector<unsigned char>> &Xt, gsl_matrix *K,
-                   const bool calc_K, const size_t ni_test,
-                   const size_t ns_test) {
-  checkpoint("read-file-geno",file_geno);
-  debug_msg("entered");
-  igzstream infile(file_geno.c_str(), igzstream::in);
-  if (!infile) {
-    cout << "error reading genotype file:" << file_geno << endl;
-    return false;
-  }
-
-  Xt.clear();
-  vector<unsigned char> Xt_row;
-  for (size_t i = 0; i < ni_test; i++) {
-    Xt_row.push_back(0);
-  }
-
-  string line;
-  char *ch_ptr;
-
-  if (calc_K == true) {
-    gsl_matrix_set_zero(K);
-  }
-
-  gsl_vector *genotype = gsl_vector_safe_alloc(ni_test);
-  gsl_vector *genotype_miss = gsl_vector_safe_alloc(ni_test);
-  double geno, geno_mean;
-  size_t n_miss;
-
-  size_t ni_total = indicator_idv.size();
-  size_t ns_total = indicator_snp.size();
-
-  size_t c_idv = 0, c_snp = 0;
-
-  auto infilen = file_geno.c_str();
-  for (size_t i = 0; i < ns_total; ++i) {
-    safeGetline(infile, line).eof();
-    if (indicator_snp[i] == 0) {
-      continue;
-    }
-
-    ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen);
-    ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
-    ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
-
-    c_idv = 0;
-    geno_mean = 0;
-    n_miss = 0;
-    gsl_vector_set_zero(genotype_miss);
-    for (uint j = 0; j < ni_total; ++j) {
-      ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
-      if (indicator_idv[j] == 0) {
-        continue;
-      }
-
-      if (strcmp(ch_ptr, "NA") == 0) {
-        gsl_vector_set(genotype_miss, c_idv, 1);
-        n_miss++;
-      } else {
-        geno = atof(ch_ptr);
-        gsl_vector_set(genotype, c_idv, geno);
-        geno_mean += geno;
-      }
-      c_idv++;
-    }
-
-    geno_mean /= (double)(ni_test - n_miss);
-
-    for (size_t j = 0; j < genotype->size; ++j) {
-      if (gsl_vector_get(genotype_miss, j) == 1) {
-        geno = geno_mean;
-      } else {
-        geno = gsl_vector_get(genotype, j);
-      }
-
-      Xt_row[j] = Double02ToUchar(geno);
-      gsl_vector_set(genotype, j, (geno - geno_mean));
-    }
-    Xt.push_back(Xt_row);
-
-    if (calc_K == true) {
-      gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
-    }
-
-    c_snp++;
-  }
-
-  if (calc_K == true) {
-    gsl_matrix_scale(K, 1.0 / (double)ns_test);
-
-    for (size_t i = 0; i < genotype->size; ++i) {
-      for (size_t j = 0; j < i; ++j) {
-        geno = gsl_matrix_get(K, j, i);
-        gsl_matrix_set(K, i, j, geno);
-      }
-    }
-  }
-
-  gsl_vector_free(genotype);
-  gsl_vector_free(genotype_miss);
-
-  infile.clear();
-  infile.close();
-
-  return true;
-}
-
 // Read bimbam mean genotype file, the second time, recode "mean"
 // genotype and calculate K.
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
diff --git a/src/gemma_io.h b/src/gemma_io.h
index 5c14227..116c1a5 100644
--- a/src/gemma_io.h
+++ b/src/gemma_io.h
@@ -100,11 +100,6 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                   const bool calc_K);
-bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
-                   vector<int> &indicator_snp,
-                   vector<vector<unsigned char>> &Xt, gsl_matrix *K,
-                   const bool calc_K, const size_t ni_test,
-                   const size_t ns_test);
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
                   gsl_matrix *K, const bool calc_K, const size_t ni_test,
diff --git a/src/param.cpp b/src/param.cpp
index f96e9c3..9755579 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -104,7 +104,9 @@ PARAM::PARAM(void)
       randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200),
       error(false), ni_subsample(0), n_cvt(1), n_cat(0), n_vc(1),
       time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0),
-      time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {}
+      time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {
+    is_mdb = false;
+  }
 
 PARAM::~PARAM() {
   if (gsl_r)
@@ -181,12 +183,14 @@ void PARAM::ReadFiles(void) {
     }
 
     if (!file_geno.empty()) {
-      if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) ==
-          false) {
+      std::string extension = ".mdb";
+      if (file_geno.size() >= extension.size() && file_geno.compare(file_geno.size() - extension.size(), extension.size(), extension) == 0)
+        is_mdb = true;
+      if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
         error = true;
       }
 
-      if (CountFileLines(file_geno, ns_total) == false) {
+      if (!is_mdb && is_check_mode() && CountFileLines(file_geno, ns_total) == false) {
         error = true;
       }
     }
@@ -1277,26 +1281,6 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
   return;
 }
 
-void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
-                          const bool calc_K) {
-  string file_str;
-
-  if (!file_bfile.empty()) {
-    file_str = file_bfile + ".bed";
-    if (ReadFile_bed(file_str, indicator_idv, indicator_snp, Xt, K, calc_K,
-                     ni_test, ns_test) == false) {
-      error = true;
-    }
-  } else {
-    if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K,
-                      ni_test, ns_test) == false) {
-      error = true;
-    }
-  }
-
-  return;
-}
-
 void PARAM::CalcKin(gsl_matrix *matrix_kin) {
   string file_str;
 
diff --git a/src/param.h b/src/param.h
index d3ce686..8d2fba3 100644
--- a/src/param.h
+++ b/src/param.h
@@ -160,6 +160,8 @@ public:
   string file_ksnps;   // File SNPs for computing K
   string file_gwasnps; // File SNPs for computing GWAS
 
+  bool is_mdb;
+
   // QC-related parameters.
   double miss_level;
   double maf_level;
@@ -337,8 +339,6 @@ public:
   void CheckData();
   void PrintSummary();
   void ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
-  void ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
-                     const bool calc_K);
   void CheckCvt();
   void CopyCvt(gsl_matrix *W);
   void CopyA(size_t flag, gsl_matrix *A);