about summary refs log tree commit diff
path: root/src/gemma_io.cpp
diff options
context:
space:
mode:
authorPjotr Prins2025-11-23 14:09:24 +0100
committerPjotr Prins2025-11-23 14:09:24 +0100
commit8229e4e18dddae7bf570ca37767ff39726eede45 (patch)
tree17986d394dbc3a03da9bebe6eba96119dff571dd /src/gemma_io.cpp
parentff196955e3ca5bdd671a44f852f42323d3828be2 (diff)
downloadpangemma-8229e4e18dddae7bf570ca37767ff39726eede45.tar.gz
Comments HEAD master
Diffstat (limited to 'src/gemma_io.cpp')
-rw-r--r--src/gemma_io.cpp10
1 files changed, 6 insertions, 4 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index fddfd79..7d1f0ca 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -698,7 +698,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
 
   gsl_vector *genotype = gsl_vector_safe_alloc(W->size1);
   gsl_vector *genotype_miss = gsl_vector_safe_alloc(W->size1);
-  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2); // Covariates
   gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
   gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
   gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
@@ -746,13 +746,14 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
   //      cerr << key << endl;
   // }
   while (!safe_get_line(infile, line).eof()) {
-    // cout << line;
+    // fetch SNP annotation
     ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen);
     rs = ch_ptr;
     ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
     minor = ch_ptr;
     ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
     major = ch_ptr;
+    // genotypes get read below
 
     if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
       // if SNP in geno but not in -snps we add an missing value
@@ -792,11 +793,12 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
     c_idv = 0;
     gsl_vector_set_zero(genotype_miss);
     auto infilen = file_geno.c_str();
-    for (int i = 0; i < ni_total; ++i) {
+    for (int i = 0; i < ni_total; ++i) { // read genotypes
       ch_ptr = strtok_safe2(NULL, " ,\t",(string("To many individuals/strains/genometypes in pheno file for ")+infilen).c_str());
       if (indicator_idv[i] == 0)
         continue;
 
+      // annotate missing genotypes
       enforce_msg(ch_ptr,"Problem reading geno file (not enough genotypes in line)");
       if (strcmp(ch_ptr, "NA") == 0) {
         gsl_vector_set(genotype_miss, c_idv, 1);
@@ -900,7 +902,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
   if (max_g != 2.0)
     warning_msg("The maximum genotype value is not 2.0 - this is not the BIMBAM standard and will skew l_lme and effect sizes");
 
-  gsl_vector_free(genotype);
+  gsl_vector_free(genotype); // we don't hang on to the genotypes
   gsl_vector_free(genotype_miss);
   gsl_matrix_free(WtW);
   gsl_matrix_free(WtWi);