diff options
Diffstat (limited to 'src/gemma.cpp')
| -rw-r--r-- | src/gemma.cpp | 175 |
1 files changed, 104 insertions, 71 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp index 6c2b3f4..a39d67a 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -62,6 +62,7 @@ extern "C" { #include "vc.h" #include "debug.h" #include "version.h" +#include <guile_api.h> using namespace std; @@ -81,10 +82,12 @@ void gemma_gsl_error_handler (const char * reason, #include <openblas_config.h> #endif +using namespace gemmaguile; + void GEMMA::PrintHeader(void) { cout << - "GEMMA forked executable --- part of PanGEMMA " << version << " (" << date << ") by Xiang Zhou, Pjotr Prins and team (C) 2012-" << year << endl; + "Pangemma " << version << " --- GEMMA 0.98.5 compatible executable (" << date << ") with" << OPENBLAS_VERSION << "and guile " << global_guile_version() << endl << "by Xiang Zhou, Pjotr Prins and team (C) 2012-" << year << endl ; return; } @@ -1653,7 +1656,7 @@ void GEMMA::BatchRun(PARAM &cPar) { clock_t time_begin, time_start; time_begin = clock(); - if (is_check_mode()) enable_segfpe(); // fast NaN checking by default + // if (is_check_mode()) enable_segfpe(); // fast NaN checking by default // Read Files. cout << "Reading Files ... " << endl; @@ -1908,32 +1911,52 @@ void GEMMA::BatchRun(PARAM &cPar) { } // Generate Kinship matrix (optionally using LOCO) - if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2) { + // if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2) { + if (is_kinship_compute(cPar.a_mode)) { cout << "Calculating Relatedness Matrix ... " << endl; - gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total); - enforce_msg(G, "allocate G"); // just to be sure + if (cPar.is_mdb) { + time_start = clock(); + auto K = cPar.MdbCalcKin(); + cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + if (cPar.a_mode == M_KIN) { + cPar.WriteMatrix(K, "cXX"); + } else { // M_KIN2 + cPar.WriteMatrix(K, "sXX"); + } + + gsl_matrix_safe_free(K); + return; + } + + enforce(cPar.ni_total > 0); + gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total); + enforce_msg(K, "allocate G"); // just to be sure time_start = clock(); - cPar.CalcKin(G); + if (cPar.error == true) { + cout << "error! failed to prepare for calculating relatedness matrix. " << endl; + return; + } + cPar.CalcKin(K); cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); if (cPar.error == true) { - cout << "error! fail to calculate relatedness matrix. " << endl; + cout << "error! failed to calculate relatedness matrix. " << endl; return; } // Now we have the Kinship matrix test it - validate_K(G); + validate_K(K); if (cPar.a_mode == M_KIN) { - cPar.WriteMatrix(G, "cXX"); + cPar.WriteMatrix(K, "cXX"); } else { // M_KIN2 - cPar.WriteMatrix(G, "sXX"); + cPar.WriteMatrix(K, "sXX"); } - gsl_matrix_safe_free(G); + gsl_matrix_safe_free(K); } // Compute the LDSC weights (not implemented yet) @@ -1943,7 +1966,7 @@ void GEMMA::BatchRun(PARAM &cPar) { VARCOV cVarcov; cVarcov.CopyFromParam(cPar); - if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now + // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now if (!cPar.file_bfile.empty()) { cVarcov.AnalyzePlink(); @@ -2058,7 +2081,7 @@ void GEMMA::BatchRun(PARAM &cPar) { VARCOV cVarcov; cVarcov.CopyFromParam(cPar); - if (is_check_mode()) disable_segfpe(); // fast NaN checking for now + // if (is_check_mode()) disable_segfpe(); // fast NaN checking for now if (!cPar.file_bfile.empty()) { cVarcov.AnalyzePlink(); @@ -2569,19 +2592,23 @@ void GEMMA::BatchRun(PARAM &cPar) { cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 || cPar.a_mode == M_LMM9 || cPar.a_mode == M_EIGEN) { // Fit LMM or mvLMM or eigen write(cPar.a_mode, "Mode"); - gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph); + auto n_ph = cPar.n_ph; // # of trait values + auto ni_test = cPar.ni_test; // # of individuals to test + enforce(n_ph > 0); + enforce(ni_test > 0); + gsl_matrix *Y = gsl_matrix_safe_alloc(ni_test, n_ph); // Y is ind x phenotypes enforce_msg(Y, "allocate Y"); // just to be sure - gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt); - gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c - // matrix - gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2); - gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1); - gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1); - gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2); - gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2); - gsl_vector *eval = gsl_vector_calloc(Y->size1); - gsl_vector *env = gsl_vector_safe_alloc(Y->size1); - gsl_vector *weight = gsl_vector_safe_alloc(Y->size1); + gsl_matrix *W = gsl_matrix_safe_alloc(ni_test, cPar.n_cvt); // W is ind x covariates + gsl_matrix *B = gsl_matrix_safe_alloc(n_ph, W->size2); // B is a d by c (effect size) + gsl_matrix *se_B = gsl_matrix_safe_alloc(n_ph, W->size2); // se + enforce(ni_test > 0); + gsl_matrix *K = gsl_matrix_safe_alloc(ni_test, ni_test); // Kinship aka as G/GRM in the equation ind x ind + gsl_matrix *U = gsl_matrix_safe_alloc(ni_test, ni_test); + gsl_matrix *UtW = gsl_matrix_calloc(ni_test, W->size2); // Transformed ind x covariates + gsl_matrix *UtY = gsl_matrix_calloc(ni_test, n_ph); // Transformed ind x phenotypes + gsl_vector *eval = gsl_vector_calloc(ni_test); // eigen values + gsl_vector *env = gsl_vector_safe_alloc(ni_test); // E environment + gsl_vector *weight = gsl_vector_safe_alloc(ni_test); // weights debug_msg("Started on LMM"); assert_issue(is_issue(26), UtY->data[0] == 0.0); @@ -2592,38 +2619,38 @@ void GEMMA::BatchRun(PARAM &cPar) { cPar.CopyGxe(env); } - // read relatedness matrix G + // read relatedness matrix K if (!(cPar.file_kin).empty()) { ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, - cPar.k_mode, cPar.error, G); + cPar.k_mode, cPar.error, K); debug_msg("Read K/GRM file"); if (cPar.error == true) { cout << "error! fail to read kinship/relatedness file. " << endl; return; } - // center matrix G - CenterMatrix(G); - validate_K(G); - write(G, "G"); + // center matrix K + CenterMatrix(K); + validate_K(K); + write(K, "K"); // is residual weights are provided, then if (!cPar.file_weight.empty()) { cPar.CopyWeight(weight); double d, wi, wj; - for (size_t i = 0; i < G->size1; i++) { + for (size_t i = 0; i < K->size1; i++) { wi = gsl_vector_get(weight, i); - for (size_t j = i; j < G->size2; j++) { + for (size_t j = i; j < K->size2; j++) { wj = gsl_vector_get(weight, j); - d = gsl_matrix_get(G, i, j); + d = gsl_matrix_get(K, i, j); if (wi <= 0 || wj <= 0) { d = 0; } else { d /= safe_sqrt(wi * wj); } - gsl_matrix_set(G, i, j, d); + gsl_matrix_set(K, i, j, d); if (j != i) { - gsl_matrix_set(G, j, i, d); + gsl_matrix_set(K, j, i, d); } } } @@ -2634,9 +2661,9 @@ void GEMMA::BatchRun(PARAM &cPar) { time_start = clock(); if (cPar.a_mode == M_EIGEN) { - cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0); + cPar.trace_G = EigenDecomp_Zeroed(K, U, eval, 0); } else { - cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0); + cPar.trace_G = EigenDecomp_Zeroed(K, U, eval, 0); } // write(eval,"eval"); @@ -2693,8 +2720,8 @@ void GEMMA::BatchRun(PARAM &cPar) { LMM cLmm; cLmm.CopyFromParam(cPar); - gsl_vector_view Y_col = gsl_matrix_column(Y, 0); - gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); + gsl_vector_view Y_col = gsl_matrix_column(Y, 0); // get a single phenotype column + gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and transformed column cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); // y is the predictor, not the phenotype @@ -2811,34 +2838,40 @@ void GEMMA::BatchRun(PARAM &cPar) { // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now - gsl_vector_view Y_col = gsl_matrix_column(Y, 0); - gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); + gsl_vector_view Y_col = gsl_matrix_column(Y, 0); // get pheno column + gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and its transformed write(&Y_col.vector, "Y_col"); - if (!cPar.file_bfile.empty()) { - // PLINK analysis - if (cPar.file_gxe.empty()) { - cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, cPar.setGWASnps); - } - else { - cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, env); - } + if (cPar.is_mdb) { + cLmm.mdb_calc_gwa(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, cPar.loco); + cLmm.CopyToParam(cPar); } else { - // BIMBAM analysis - - if (cPar.file_gxe.empty()) { - cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, cPar.setGWASnps); - } else { - cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, env); + if (!cPar.file_bfile.empty()) { + // PLINK analysis + if (cPar.file_gxe.empty()) { + cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, cPar.setGWASnps); + } + else { + cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, env); + } + } + else { + // BIMBAM analysis + + if (cPar.file_gxe.empty()) { + cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, cPar.setGWASnps); + } else { + cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, env); + } } + cLmm.WriteFiles(); // write output files + cLmm.CopyToParam(cPar); } - cLmm.WriteFiles(); - cLmm.CopyToParam(cPar); } else { debug_msg("fit mvLMM (multiple phenotypes)"); MVLMM cMvlmm; @@ -2873,7 +2906,7 @@ void GEMMA::BatchRun(PARAM &cPar) { gsl_matrix_safe_free(W); gsl_matrix_warn_free(B); // sometimes unused gsl_matrix_warn_free(se_B); - gsl_matrix_warn_free(G); + gsl_matrix_warn_free(K); gsl_matrix_safe_free(U); gsl_matrix_safe_free(UtW); gsl_matrix_safe_free(UtY); @@ -2898,7 +2931,7 @@ void GEMMA::BatchRun(PARAM &cPar) { // run bvsr if rho==1 if (cPar.rho_min == 1 && cPar.rho_max == 1) { // read genotypes X (not UtX) - cPar.ReadGenotypes(UtX, G, false); + cPar.ReadBIMBAMGenotypes(UtX, G, false); // perform BSLMM analysis BSLMM cBslmm; @@ -2916,7 +2949,7 @@ void GEMMA::BatchRun(PARAM &cPar) { // read relatedness matrix G if (!(cPar.file_kin).empty()) { - cPar.ReadGenotypes(UtX, G, false); + cPar.ReadBIMBAMGenotypes(UtX, G, false); // read relatedness matrix G ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, @@ -2930,7 +2963,7 @@ void GEMMA::BatchRun(PARAM &cPar) { CenterMatrix(G); validate_K(G); } else { - cPar.ReadGenotypes(UtX, G, true); + cPar.ReadBIMBAMGenotypes(UtX, G, true); } // eigen-decomposition and calculate trace_G @@ -3008,7 +3041,7 @@ void GEMMA::BatchRun(PARAM &cPar) { // run bvsr if rho==1 if (cPar.rho_min == 1 && cPar.rho_max == 1) { // read genotypes X (not UtX) - cPar.ReadGenotypes(UtX, G, false); + cPar.ReadBIMBAMGenotypes(UtX, G, false); // perform BSLMM analysis BSLMM cBslmm; @@ -3027,7 +3060,7 @@ void GEMMA::BatchRun(PARAM &cPar) { // read relatedness matrix G if (!(cPar.file_kin).empty()) { - cPar.ReadGenotypes(UtX, G, false); + cPar.ReadBIMBAMGenotypes(UtX, G, false); // read relatedness matrix G ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, @@ -3042,7 +3075,7 @@ void GEMMA::BatchRun(PARAM &cPar) { validate_K(G); } else { - cPar.ReadGenotypes(UtX, G, true); + cPar.ReadBIMBAMGenotypes(UtX, G, true); } // eigen-decomposition and calculate trace_G @@ -3168,8 +3201,8 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) { } outfile << "##" << endl; - outfile << "## GEMMA Version = " << version << " (" << date << ")" << endl; - outfile << "## Build profile = " << GEMMA_PROFILE << endl ; + outfile << "## PANGEMMA Version = " << version << " (" << date << ")" << endl; + // outfile << "## Build profile = " << GEMMA_PROFILE << endl ; #ifdef __GNUC__ outfile << "## GCC version = " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__ << endl; #endif |
