diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lmm.cpp | 85 | ||||
| -rw-r--r-- | src/lmm.h | 10 |
2 files changed, 59 insertions, 36 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp index 969e6fc..9fe6bbb 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -1981,7 +1981,6 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); } - time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); auto markerinfo = markers[i]; @@ -1991,14 +1990,6 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, } }; - /* - const auto num_markers = indicator_snp.size(); - enforce_msg(num_markers > 0,"Zero SNPs to process - data corrupt?"); - if (num_markers < 50) { - cerr << num_markers << " SNPs" << endl; - warning_msg("very few SNPs processed"); - } - */ const size_t progress_step = (num_markers/50>d_pace ? num_markers/50 : d_pace); Markers markers; @@ -2012,23 +2003,17 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, // continue; auto tup = fetch_snp(t); // use the callback - auto success = get<0>(tup); - if (!success) + auto state = get<0>(tup); + if (state == SKIP) continue; - // typedef tuple< bool,MarkerInfo,vector<double> > SnpNameValues2; - // auto marker = get<1>(tup); - // auto chr = get<2>(tup); - // auto mpos = get<3>(tup); + if (state == LAST) + break; // marker loop because of LOCO + auto markerinfo = get<1>(tup); auto gs = get<2>(tup); markers.push_back(markerinfo); - // check whether SNP is included in gwasnps (used by LOCO) - /* - if (process_gwasnps && gwasnps.count(snp) == 0) - continue; - */ // drop missing idv and plug mean values for missing geno double x_total = 0.0; // sum genotype values to compute x_mean uint vpos = 0; // position in target vector @@ -2067,7 +2052,6 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, gsl_vector_safe_memcpy(&Xlarge_col.vector, x); c++; // count markers going in - if (c % msize == 0) { batch_compute(msize,markers); markers.clear(); @@ -2094,6 +2078,44 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, return; } + outfile << "chr" << "\t" + << "rs" << "\t" + << "ps" << "\t" + << "n_miss" << "\t" + << "allele1" << "\t" + << "allele0" << "\t" + << "af" << "\t"; + + if (a_mode != M_LMM2) { + outfile << "beta" << "\t"; + outfile << "se" << "\t"; + } + + if (a_mode != M_LMM3 && a_mode != M_LMM9) + outfile << "logl_H1" << "\t"; + + switch(a_mode) { + case M_LMM1: + outfile << "l_remle" << "\t" + << "p_wald" << endl; + break; + case M_LMM2: + case M_LMM9: + outfile << "l_mle" << "\t" + << "p_lrt" << endl; + break; + case M_LMM3: + outfile << "p_score" << endl; + break; + case M_LMM4: + outfile << "l_remle" << "\t" + << "l_mle" << "\t" + << "p_wald" << "\t" + << "p_lrt" << "\t" + << "p_score" << endl; + break; + } + auto sumstats = [&] (SUMSTAT2 st) { outfile << scientific << setprecision(6); auto m = st.markerinfo; @@ -2212,10 +2234,6 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, mdb_stat(rtxn, geno_mdb, &stat); auto num_markers = stat.ms_entries; - // fetch_snp is a callback function for every SNP row - // returns typedef std::tuple<bool,string,std::vector<double> > SnpNameValues; - - // size_t prev_line = 0; auto mdb_fetch = MDB_FIRST; auto cursor = lmdb::cursor::open(rtxn, geno_mdb); @@ -2227,20 +2245,14 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, string_view key,value; - /* - while (prev_line <= num) { - cursor.get(key, value, MDB_NEXT); - prev_line++; - } - */ - auto success = cursor.get(key, value, mdb_fetch); + auto mdb_success = cursor.get(key, value, mdb_fetch); mdb_fetch = MDB_NEXT; // uint8_t chr; vector<double> gs; MarkerInfo markerinfo; - if (success) { + if (mdb_success) { size_t size = 0; // ---- Depending on the format we get different buffers - currently float and byte are supported: if (format == "Gb") { @@ -2282,7 +2294,10 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, // printf("%#02x %#02x\n", chr, loco_chr); if (is_loco && loco_chr != chr) { - return make_tuple(false, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs); + if (chr > loco_chr) + return make_tuple(LAST, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs); + else + return make_tuple(SKIP, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs); } string_view value2; @@ -2299,7 +2314,7 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, // cout << "!!!!" << size << marker << ": af" << maf << " " << gs[0] << "," << gs[1] << "," << gs[2] << "," << gs[3] << endl; } - return make_tuple(success, markerinfo, gs); + return make_tuple(COMPUTE, markerinfo, gs); }; LMM::mdb_analyze(fetch_snp,U,eval,UtW,Uty,W,y,num_markers); diff --git a/src/lmm.h b/src/lmm.h index da5ad21..295602a 100644 --- a/src/lmm.h +++ b/src/lmm.h @@ -55,7 +55,15 @@ struct MarkerInfo { typedef vector<MarkerInfo> Markers; typedef tuple< string,vector<double> > SnpNameValues; -typedef tuple< bool,MarkerInfo,vector<double> > SnpNameValues2; // success, markerinfo (maf and n_miss are computed) + +enum MarkerState { + FAIL, + COMPUTE, + SKIP, + LAST +}; + +typedef tuple< MarkerState,MarkerInfo,vector<double> > SnpNameValues2; // success, markerinfo (maf and n_miss are computed) // Results for LMM. class SUMSTAT2 { public: |
