about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lmm.cpp85
-rw-r--r--src/lmm.h10
2 files changed, 59 insertions, 36 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 969e6fc..9fe6bbb 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -1981,7 +1981,6 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
         CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
         p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
       }
-
       time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
       auto markerinfo = markers[i];
@@ -1991,14 +1990,6 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
     }
   };
 
-  /*
-  const auto num_markers = indicator_snp.size();
-  enforce_msg(num_markers > 0,"Zero SNPs to process - data corrupt?");
-  if (num_markers < 50) {
-    cerr << num_markers << " SNPs" << endl;
-    warning_msg("very few SNPs processed");
-  }
-  */
   const size_t progress_step = (num_markers/50>d_pace ? num_markers/50 : d_pace);
   Markers markers;
 
@@ -2012,23 +2003,17 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
     // continue;
 
     auto tup = fetch_snp(t); // use the callback
-    auto success = get<0>(tup);
-    if (!success)
+    auto state = get<0>(tup);
+    if (state == SKIP)
       continue;
-    // typedef tuple< bool,MarkerInfo,vector<double> > SnpNameValues2;
-    // auto marker = get<1>(tup);
-    // auto chr = get<2>(tup);
-    // auto mpos = get<3>(tup);
+    if (state == LAST)
+      break; // marker loop because of LOCO
+
     auto markerinfo = get<1>(tup);
     auto gs = get<2>(tup);
 
     markers.push_back(markerinfo);
 
-    // check whether SNP is included in gwasnps (used by LOCO)
-    /*
-    if (process_gwasnps && gwasnps.count(snp) == 0)
-      continue;
-    */
     // drop missing idv and plug mean values for missing geno
     double x_total = 0.0; // sum genotype values to compute x_mean
     uint vpos = 0;        // position in target vector
@@ -2067,7 +2052,6 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
     gsl_vector_safe_memcpy(&Xlarge_col.vector, x);
     c++; // count markers going in
 
-
     if (c % msize == 0) {
       batch_compute(msize,markers);
       markers.clear();
@@ -2094,6 +2078,44 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
     return;
   }
 
+  outfile << "chr" << "\t"
+          << "rs" << "\t"
+          << "ps" << "\t"
+          << "n_miss" << "\t"
+          << "allele1" << "\t"
+          << "allele0" << "\t"
+          << "af" << "\t";
+
+  if (a_mode != M_LMM2) {
+    outfile << "beta" << "\t";
+    outfile << "se" << "\t";
+  }
+
+  if (a_mode != M_LMM3 && a_mode != M_LMM9)
+    outfile << "logl_H1" << "\t";
+
+  switch(a_mode) {
+  case M_LMM1:
+    outfile << "l_remle" << "\t"
+            << "p_wald" << endl;
+    break;
+  case M_LMM2:
+  case M_LMM9:
+    outfile << "l_mle" << "\t"
+            << "p_lrt" << endl;
+    break;
+  case M_LMM3:
+    outfile << "p_score" << endl;
+    break;
+  case M_LMM4:
+    outfile << "l_remle" << "\t"
+            << "l_mle" << "\t"
+            << "p_wald" << "\t"
+            << "p_lrt" << "\t"
+            << "p_score" << endl;
+    break;
+  }
+
   auto sumstats = [&] (SUMSTAT2 st) {
     outfile << scientific << setprecision(6);
     auto m = st.markerinfo;
@@ -2212,10 +2234,6 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
   mdb_stat(rtxn, geno_mdb, &stat);
   auto num_markers = stat.ms_entries;
 
-  // fetch_snp is a callback function for every SNP row
-  // returns typedef std::tuple<bool,string,std::vector<double> > SnpNameValues;
-
-  // size_t prev_line = 0;
   auto mdb_fetch = MDB_FIRST;
 
   auto cursor = lmdb::cursor::open(rtxn, geno_mdb);
@@ -2227,20 +2245,14 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
 
     string_view key,value;
 
-    /*
-    while (prev_line <= num) {
-      cursor.get(key, value, MDB_NEXT);
-      prev_line++;
-    }
-    */
-    auto success = cursor.get(key, value, mdb_fetch);
+    auto mdb_success = cursor.get(key, value, mdb_fetch);
     mdb_fetch = MDB_NEXT;
 
     // uint8_t chr;
     vector<double> gs;
     MarkerInfo markerinfo;
 
-    if (success) {
+    if (mdb_success) {
       size_t size = 0;
       // ---- Depending on the format we get different buffers - currently float and byte are supported:
       if (format == "Gb") {
@@ -2282,7 +2294,10 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
       // printf("%#02x %#02x\n", chr, loco_chr);
 
       if (is_loco && loco_chr != chr) {
-        return make_tuple(false, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs);
+        if (chr > loco_chr)
+            return make_tuple(LAST, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs);
+          else
+            return make_tuple(SKIP, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs);
       }
 
       string_view value2;
@@ -2299,7 +2314,7 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
 
       // cout << "!!!!" << size << marker << ": af" << maf << " " << gs[0] << "," << gs[1] << "," << gs[2] << "," << gs[3] << endl;
     }
-    return make_tuple(success, markerinfo, gs);
+    return make_tuple(COMPUTE, markerinfo, gs);
   };
   LMM::mdb_analyze(fetch_snp,U,eval,UtW,Uty,W,y,num_markers);
 
diff --git a/src/lmm.h b/src/lmm.h
index da5ad21..295602a 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -55,7 +55,15 @@ struct MarkerInfo {
 
 typedef vector<MarkerInfo> Markers;
 typedef tuple< string,vector<double> > SnpNameValues;
-typedef tuple< bool,MarkerInfo,vector<double> > SnpNameValues2; // success, markerinfo (maf and n_miss are computed)
+
+enum MarkerState {
+  FAIL,
+  COMPUTE,
+  SKIP,
+  LAST
+};
+
+typedef tuple< MarkerState,MarkerInfo,vector<double> > SnpNameValues2; // success, markerinfo (maf and n_miss are computed)
 // Results for LMM.
 class SUMSTAT2 {
 public: