diff options
| author | Pjotr Prins | 2025-12-01 08:31:39 +0100 |
|---|---|---|
| committer | Pjotr Prins | 2025-12-01 08:31:39 +0100 |
| commit | e0a32b97dd2e6b0ab769b6e7ec6d3d217eeb15a0 (patch) | |
| tree | a16614b2046b59718eb8ef76bee98e181e4b7758 /src | |
| parent | e233c0d2b6b9744005c009af6b78683a24137a00 (diff) | |
| download | pangemma-e0a32b97dd2e6b0ab769b6e7ec6d3d217eeb15a0.tar.gz | |
Wiring in mdb GWA
Diffstat (limited to 'src')
| -rw-r--r-- | src/gemma.cpp | 66 | ||||
| -rw-r--r-- | src/lmm.cpp | 54 | ||||
| -rw-r--r-- | src/lmm.h | 4 |
3 files changed, 95 insertions, 29 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp index 1cced77..fb2227e 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -2591,20 +2591,24 @@ void GEMMA::BatchRun(PARAM &cPar) { cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 || cPar.a_mode == M_LMM9 || cPar.a_mode == M_EIGEN) { // Fit LMM or mvLMM or eigen write(cPar.a_mode, "Mode"); - gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph); // Y is ind x phenotypes + auto ni_test = cPar.ni_test; + auto n_ph = cPar.n_ph; + enforce(ni_test > 0); + enforce(n_ph > 0); + gsl_matrix *Y = gsl_matrix_safe_alloc(ni_test, n_ph); // Y is ind x phenotypes enforce_msg(Y, "allocate Y"); // just to be sure - gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt); // W is ind x covariates + gsl_matrix *W = gsl_matrix_safe_alloc(ni_test, cPar.n_cvt); // W is ind x covariates gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c (effect size) // matrix gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2); // se - enforce(Y->size1 > 0); - gsl_matrix *K = gsl_matrix_safe_alloc(Y->size1, Y->size1); // Kinship aka as G/GRM in the equation ind x ind - gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1); - gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2); // Transformed ind x covariates - gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2); // Transforemd ind x phenotypes - gsl_vector *eval = gsl_vector_calloc(Y->size1); // eigen values - gsl_vector *env = gsl_vector_safe_alloc(Y->size1); // E environment - gsl_vector *weight = gsl_vector_safe_alloc(Y->size1); // weights + enforce(ni_test > 0); + gsl_matrix *K = gsl_matrix_safe_alloc(ni_test, ni_test); // Kinship aka as G/GRM in the equation ind x ind + gsl_matrix *U = gsl_matrix_safe_alloc(ni_test, ni_test); + gsl_matrix *UtW = gsl_matrix_calloc(ni_test, W->size2); // Transformed ind x covariates + gsl_matrix *UtY = gsl_matrix_calloc(ni_test, Y->size2); // Transforemd ind x phenotypes + gsl_vector *eval = gsl_vector_calloc(ni_test); // eigen values + gsl_vector *env = gsl_vector_safe_alloc(ni_test); // E environment + gsl_vector *weight = gsl_vector_safe_alloc(ni_test); // weights debug_msg("Started on LMM"); assert_issue(is_issue(26), UtY->data[0] == 0.0); @@ -2838,28 +2842,32 @@ void GEMMA::BatchRun(PARAM &cPar) { gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and its transformed write(&Y_col.vector, "Y_col"); - if (!cPar.file_bfile.empty()) { - // PLINK analysis - if (cPar.file_gxe.empty()) { - cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, cPar.setGWASnps); + if (cPar.is_mdb) + cLmm.mdb_calc_gwa(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, cPar.setGWASnps); + else + if (!cPar.file_bfile.empty()) { + // PLINK analysis + if (cPar.file_gxe.empty()) { + cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, cPar.setGWASnps); + } + else { + cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, env); + } } else { - cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, env); + // BIMBAM analysis + + if (cPar.file_gxe.empty()) { + cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, cPar.setGWASnps); + } else { + cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, env); + } } - } - else { - // BIMBAM analysis - - if (cPar.file_gxe.empty()) { - cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, cPar.setGWASnps); - } else { - cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, env); - } - } cLmm.WriteFiles(); // write output files cLmm.CopyToParam(cPar); } else { diff --git a/src/lmm.cpp b/src/lmm.cpp index f5b8a88..e58adc9 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -1867,6 +1867,60 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, } /* + This is the mirror function of below AnalyzeBimbam, but uses mdb input instead. + */ + +void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const set<string> gwasnps) { + debug_msg(file_geno); + auto infilen = file_geno.c_str(); + checkpoint("start-read-geno-file",file_geno); + + igzstream infile(infilen, igzstream::in); + enforce_msg(infile, "error reading genotype file"); + size_t prev_line = 0; + + std::vector <double> gs; + gs.resize(ni_total); + + // fetch_snp is a callback function for every SNP row + std::function<SnpNameValues(size_t)> fetch_snp = [&](size_t num) { + string line; + while (prev_line <= num) { + // also read SNPs that were skipped + safeGetline(infile, line); + prev_line++; + } + char *ch_ptr = strtok_safe2((char *)line.c_str(), " , \t",infilen); + // enforce_msg(ch_ptr, "Parsing BIMBAM genofile"); // ch_ptr should not be NULL + + auto snp = string(ch_ptr); + ch_ptr = strtok_safe2(NULL, " , \t",infilen); // skip column + ch_ptr = strtok_safe2(NULL, " , \t",infilen); // skip column + + gs.assign (ni_total,nan("")); // wipe values + + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok_safe2(NULL, " , \t",infilen); + if (strcmp(ch_ptr, "NA") != 0) { + gs[i] = atof(ch_ptr); + if (is_strict_mode() && gs[i] == 0.0) + enforce_is_float(std::string(ch_ptr)); // only allow for NA and valid numbers + } + } + return std::make_tuple(snp,gs); + }; + + LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps); + + infile.close(); + infile.clear(); +} + + +/* Looking at the `LMM::AnalyzeBimbam` function, here are the parameters that get modified: diff --git a/src/lmm.h b/src/lmm.h index 7b90510..cedbf38 100644 --- a/src/lmm.h +++ b/src/lmm.h @@ -100,6 +100,10 @@ public: const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const set<string> gwasnps); + void mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const set<string> gwasnps); void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, |
