diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/gemma.cpp | 30 | ||||
| -rw-r--r-- | src/gemma_io.cpp | 10 | ||||
| -rw-r--r-- | src/mathfunc.cpp | 15 |
3 files changed, 35 insertions, 20 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp index 4ef2059..b30c3c3 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -2572,19 +2572,19 @@ void GEMMA::BatchRun(PARAM &cPar) { cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 || cPar.a_mode == M_LMM9 || cPar.a_mode == M_EIGEN) { // Fit LMM or mvLMM or eigen write(cPar.a_mode, "Mode"); - gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph); + gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph); // Y is ind x phenotypes enforce_msg(Y, "allocate Y"); // just to be sure - gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt); - gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c + gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt); // W is ind x covariates + gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c (effect size) // matrix - gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2); - gsl_matrix *K = gsl_matrix_safe_alloc(Y->size1, Y->size1); + gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2); // se + gsl_matrix *K = gsl_matrix_safe_alloc(Y->size1, Y->size1); // Kinship aka as G/GRM in the equation ind x ind gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1); - gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2); - gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2); - gsl_vector *eval = gsl_vector_calloc(Y->size1); - gsl_vector *env = gsl_vector_safe_alloc(Y->size1); - gsl_vector *weight = gsl_vector_safe_alloc(Y->size1); + gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2); // Transformed ind x covariates + gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2); // Transforemd ind x phenotypes + gsl_vector *eval = gsl_vector_calloc(Y->size1); // eigen values + gsl_vector *env = gsl_vector_safe_alloc(Y->size1); // E environment + gsl_vector *weight = gsl_vector_safe_alloc(Y->size1); // weights debug_msg("Started on LMM"); assert_issue(is_issue(26), UtY->data[0] == 0.0); @@ -2696,8 +2696,8 @@ void GEMMA::BatchRun(PARAM &cPar) { LMM cLmm; cLmm.CopyFromParam(cPar); - gsl_vector_view Y_col = gsl_matrix_column(Y, 0); - gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); + gsl_vector_view Y_col = gsl_matrix_column(Y, 0); // get a single phenotype column + gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and transformed column cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); // y is the predictor, not the phenotype @@ -2814,8 +2814,8 @@ void GEMMA::BatchRun(PARAM &cPar) { // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now - gsl_vector_view Y_col = gsl_matrix_column(Y, 0); - gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); + gsl_vector_view Y_col = gsl_matrix_column(Y, 0); // get pheno column + gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and its transformed write(&Y_col.vector, "Y_col"); if (!cPar.file_bfile.empty()) { @@ -2840,7 +2840,7 @@ void GEMMA::BatchRun(PARAM &cPar) { &Y_col.vector, env); } } - cLmm.WriteFiles(); + cLmm.WriteFiles(); // write output files cLmm.CopyToParam(cPar); } else { debug_msg("fit mvLMM (multiple phenotypes)"); diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index fddfd79..7d1f0ca 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -698,7 +698,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, gsl_vector *genotype = gsl_vector_safe_alloc(W->size1); gsl_vector *genotype_miss = gsl_vector_safe_alloc(W->size1); - gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2); + gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2); // Covariates gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2); gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2); gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2); @@ -746,13 +746,14 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, // cerr << key << endl; // } while (!safe_get_line(infile, line).eof()) { - // cout << line; + // fetch SNP annotation ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen); rs = ch_ptr; ch_ptr = strtok_safe2(NULL, " ,\t",infilen); minor = ch_ptr; ch_ptr = strtok_safe2(NULL, " ,\t",infilen); major = ch_ptr; + // genotypes get read below if (setSnps.size() != 0 && setSnps.count(rs) == 0) { // if SNP in geno but not in -snps we add an missing value @@ -792,11 +793,12 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, c_idv = 0; gsl_vector_set_zero(genotype_miss); auto infilen = file_geno.c_str(); - for (int i = 0; i < ni_total; ++i) { + for (int i = 0; i < ni_total; ++i) { // read genotypes ch_ptr = strtok_safe2(NULL, " ,\t",(string("To many individuals/strains/genometypes in pheno file for ")+infilen).c_str()); if (indicator_idv[i] == 0) continue; + // annotate missing genotypes enforce_msg(ch_ptr,"Problem reading geno file (not enough genotypes in line)"); if (strcmp(ch_ptr, "NA") == 0) { gsl_vector_set(genotype_miss, c_idv, 1); @@ -900,7 +902,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, if (max_g != 2.0) warning_msg("The maximum genotype value is not 2.0 - this is not the BIMBAM standard and will skew l_lme and effect sizes"); - gsl_vector_free(genotype); + gsl_vector_free(genotype); // we don't hang on to the genotypes gsl_vector_free(genotype_miss); gsl_matrix_free(WtW); gsl_matrix_free(WtWi); diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index a1c6c29..a351509 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -494,7 +494,20 @@ void StandardizeVector(gsl_vector *y) { } // Calculate UtX (U gets transposed) -// CalcUtX transforms a genotype matrix into the eigenspace by computing U^T × X, where U contains the eigenvectors from the kinship matrix decomposition. This transformation diagonalizes the correlation structure induced by relatedness, making subsequent likelihood calculations tractable and efficient. The function overwrites its input with the transformed result, using a temporary copy to avoid data corruption. +/* +CalcUtX transforms a genotype matrix by the eigenvector matrix from the kinship matrix decomposition. +Purpose: + +Computes UtX = U^T × X, where: + + U: Eigenvector matrix from the spectral decomposition of the kinship matrix K + X: Genotype matrix (individuals × SNPs) + UtX: Transformed genotype matrix + +This transformation rotates the genotype data into the eigenspace where the kinship-induced correlations become diagonal, simplifying likelihood calculations. + +CalcUtX transforms a genotype matrix into the eigenspace by computing U^T × X, where U contains the eigenvectors from the kinship matrix decomposition. This transformation diagonalizes the correlation structure induced by relatedness, making subsequent likelihood calculations tractable and efficient. The function overwrites its input with the transformed result, using a temporary copy to avoid data corruption. +*/ void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) { gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2); gsl_matrix_safe_memcpy(X, UtX); |
