diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/gemma.cpp | 1 | ||||
| -rw-r--r-- | src/gemma_io.cpp | 31 | ||||
| -rw-r--r-- | src/lmm.cpp | 24 | ||||
| -rw-r--r-- | src/lmm.h | 4 | ||||
| -rw-r--r-- | src/mathfunc.cpp | 31 | ||||
| -rw-r--r-- | src/mathfunc.h | 5 |
6 files changed, 57 insertions, 39 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp index f7ebdce..3100b94 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -2845,6 +2845,7 @@ void GEMMA::BatchRun(PARAM &cPar) { if (cPar.is_mdb) { cLmm.mdb_calc_gwa(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, cPar.setGWASnps); + cLmm.CopyToParam(cPar); } else { if (!cPar.file_bfile.empty()) { diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 08f25ca..e183e3f 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -651,37 +651,6 @@ bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno, return true; } -#include <tuple> - -std::tuple<size_t, size_t, size_t> compute_ratio(size_t ni_total, const gsl_vector *gs) { - size_t n_0 = 0; - size_t n_1 = 0; - size_t n_2 = 0; - for (size_t i = 0; i < ni_total; ++i) { // read genotypes - double geno = gsl_vector_get(gs, i); - if (geno >= 0 && geno <= 0.5) { - n_0++; - } - if (geno > 0.5 && geno < 1.5) { - n_1++; - } - if (geno >= 1.5 && geno <= 2.0) { - n_2++; - } - } - return {n_0,n_1,n_2}; -} - -double compute_maf(size_t ni_total, size_t ni_test, size_t n_miss, const gsl_vector *gs) { - double maf = 0.0; - - for (size_t i = 0; i < ni_total; ++i) { // read genotypes - double geno = gsl_vector_get(gs, i); - maf += geno; - } - maf /= 2.0 * (double)(ni_test - n_miss); - return maf; -} // Read bimbam mean genotype file, the first time, to obtain #SNPs for // analysis (ns_test) and total #SNP (ns_total). diff --git a/src/lmm.cpp b/src/lmm.cpp index d13ce3c..66b8e37 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -96,8 +96,9 @@ void LMM::CopyToParam(PARAM &cPar) { checkpoint_nofile("lmm-copy-to-param"); cPar.time_UtX = time_UtX; cPar.time_opt = time_opt; - - cPar.ng_test = ng_test; // number of markers tested + cPar.ns_total = ns_total; + cPar.ns_test = ns_test; + cPar.ng_test = ng_test; // number of markers tested return; } @@ -2034,8 +2035,8 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, */ // drop missing idv and plug mean values for missing geno double x_total = 0.0; // sum genotype values to compute x_mean - uint vpos = 0; // position in target vector - uint n_miss = 0; + uint vpos = 0; // position in target vector + uint n_miss = 0; // count NA genotypes gsl_vector_set_zero(x_miss); for (size_t i = 0; i < ni_total; ++i) { // get the genotypes per individual and compute stats per SNP @@ -2107,6 +2108,9 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, outfile << chr << "\t"; outfile << name << "\t"; outfile << pos << "\t"; + auto n_miss = 0; + outfile << n_miss << "\t-\t-\t"; // n_miss column + // outfile << st.maf << "\t" ; if (a_mode != M_LMM2) { outfile << st.beta << "\t"; outfile << st.se << "\t"; @@ -2240,18 +2244,26 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, auto marker = string(value2); // 1 rs13476251 174792257 // cout << static_cast<int>(chr) << ":" << pos2 << " line " << rest2 << ":" << marker << endl ; - markerinfo = make_tuple(marker,chr,pos,num); auto size = num_floats; gs.reserve(size); for (size_t i = 0; i < size; ++i) { gs.push_back(static_cast<double>(gsbuf[i])); } - // cout << "!!!!" << size << snp << ": " << gs[0] << "," << gs[1] << "," << gs[2] << "," << gs[3] << endl; + + // compute maf and n_miss (NAs) + size_t n_miss = 0; // count NAs: FIXME + double maf = compute_maf(ni_total, ni_test, n_miss, gs.data()); + + markerinfo = make_tuple(marker,chr,pos,num); + + // cout << "!!!!" << size << marker << ": af" << maf << " " << gs[0] << "," << gs[1] << "," << gs[2] << "," << gs[3] << endl; } return make_tuple(success, markerinfo, gs); }; LMM::mdb_analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps,num_markers); + + ns_total = ns_test = num_markers; // track global number of snps in original gemma (goes to cPar) } diff --git a/src/lmm.h b/src/lmm.h index 35064c7..715b9c0 100644 --- a/src/lmm.h +++ b/src/lmm.h @@ -45,10 +45,10 @@ public: size_t e_mode; }; -typedef tuple< string, uint16_t, uint32_t, uint32_t > MarkerChrPos; +typedef tuple< string, uint16_t, uint32_t, uint32_t > MarkerChrPos; // name, chr, pos, line typedef vector<MarkerChrPos> Markers; typedef tuple< string,vector<double> > SnpNameValues; -typedef tuple< bool,MarkerChrPos,vector<double> > SnpNameValues2; +typedef tuple< bool,MarkerChrPos,vector<double> > SnpNameValues2; // success, markerinfo (maf and n_miss are computed) // Results for LMM. class SUMSTAT2 { public: diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index a351509..f132e6e 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -645,3 +645,34 @@ double UcharToDouble02(const unsigned char c) { return (double)c * 0.01; } unsigned char Double02ToUchar(const double dosage) { return (int)(dosage * 100); } + + +std::tuple<size_t, size_t, size_t> compute_ratio(size_t ni_total, const gsl_vector *gs) { + size_t n_0 = 0; + size_t n_1 = 0; + size_t n_2 = 0; + for (size_t i = 0; i < ni_total; ++i) { // read genotypes + double geno = gsl_vector_get(gs, i); + if (geno >= 0 && geno <= 0.5) { + n_0++; + } + if (geno > 0.5 && geno < 1.5) { + n_1++; + } + if (geno >= 1.5 && geno <= 2.0) { + n_2++; + } + } + return {n_0,n_1,n_2}; +} + +double compute_maf(size_t ni_total, size_t ni_test, size_t n_miss, const double *gs) { + double maf = 0.0; + + for (size_t i = 0; i < ni_total; ++i) { // read genotypes + double geno = gs[i]; + maf += geno; // 0..2 range + } + maf /= 2.0 * (double)(ni_test - n_miss); // Assumption is that geno value is between 0 and 2. FIXME + return maf; +} diff --git a/src/mathfunc.h b/src/mathfunc.h index 641d0a3..b408ea7 100644 --- a/src/mathfunc.h +++ b/src/mathfunc.h @@ -24,6 +24,7 @@ // #include "Eigen/Dense" #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" +#include <tuple> #define CONDITIONED_MAXRATIO 2e+6 // based on http://mathworld.wolfram.com/ConditionNumber.html #define EIGEN_MINVALUE 1e-10 @@ -77,4 +78,8 @@ unsigned char Double02ToUchar(const double dosage); // void uchar_matrix_get_row(const vector<vector<unsigned char>> &X, // const size_t i_row, Eigen::VectorXd &x_row); +std::tuple<size_t, size_t, size_t> compute_ratio(size_t ni_total, const gsl_vector *gs); +double compute_maf(size_t ni_total, size_t ni_test, size_t n_miss, const double *gs); + + #endif |
