diff options
author | Pjotr Prins | 2017-10-06 11:22:28 +0000 |
---|---|---|
committer | Pjotr Prins | 2017-10-06 11:22:28 +0000 |
commit | f8d6ad4f4fb7206035698a6024b898c9676d0714 (patch) | |
tree | 3da71bf08da2335b9572407669a903f6573e82b8 /src/lmm.cpp | |
parent | df1e049875adba1d3bb5762310bf31f0057fbf8d (diff) | |
download | pangemma-f8d6ad4f4fb7206035698a6024b898c9676d0714.tar.gz |
LMM: BIMBAM and PLINK share same LMM code - test pass
Diffstat (limited to 'src/lmm.cpp')
-rw-r--r-- | src/lmm.cpp | 222 |
1 files changed, 59 insertions, 163 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp index 8c0a303..2cb0fb2 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -1489,196 +1489,92 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y) { + const gsl_matrix *W, const gsl_vector *y, + const set<string> gwasnps) { string file_bed = file_bfile + ".bed"; debug_msg(file_bed); - ifstream infile(file_bed.c_str(), ios::binary); - if (!infile) { - cout << "error reading bed file:" << file_bed << endl; - return; - } - clock_t time_start = clock(); + ifstream infile(file_bed.c_str(), ios::binary); + enforce_msg(infile,"error reading genotype (.bed) file"); + bitset<8> bset8; char ch[1]; - bitset<8> b; + // int n_bit; // , n_miss, ci_total, ci_test; - double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; - double p_lrt = 0, p_score = 0; - double logl_H1 = 0.0; - int n_bit, n_miss, ci_total, ci_test; - double geno, x_mean; - - // Calculate basic quantities. - size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - - gsl_vector *x = gsl_vector_alloc(U->size1); - gsl_vector *Utx = gsl_vector_alloc(U->size2); - gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); - - // Create a large matrix. - size_t msize = LMM_BATCH_SIZE; - gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - gsl_matrix_set_zero(Uab); - CalcUab(UtW, Uty, Uab); + // size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; // Calculate n_bit and c, the number of bit for each SNP. - if (ni_total % 4 == 0) { - n_bit = ni_total / 4; - } else { - n_bit = ni_total / 4 + 1; - } + const size_t n_bit = (ni_total % 4 == 0 ? ni_total / 4 : ni_total / 4 + 1); - // Print the first three magic numbers. + // first three magic numbers. for (int i = 0; i < 3; ++i) { infile.read(ch, 1); - b = ch[0]; + const bitset<8> b = ch[0]; } - size_t c = 0, t_last = 0; - for (size_t t = 0; t < snpInfo.size(); ++t) { - if (indicator_snp[t] == 0) - continue; - t_last++; - } - for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { - if (t % d_pace == 0 || t == snpInfo.size() - 1) { - ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); - } - if (indicator_snp[t] == 0) { - continue; - } + // fetch_snp is a callback function for every SNP row + std::function<SnpNameValues(size_t)> fetch_snp = [&](size_t num) { + + std::vector <double> gs; + gs.resize(ni_total); // n_bit, and 3 is the number of magic numbers. + auto t = num; infile.seekg(t * n_bit + 3); - - // Read genotypes. - x_mean = 0.0; - n_miss = 0; - ci_total = 0; - ci_test = 0; - for (int i = 0; i < n_bit; ++i) { - infile.read(ch, 1); - b = ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j = 0; j < 4; ++j) { - if ((i == (n_bit - 1)) && ci_total == (int)ni_total) { - break; - } - if (indicator_idv[ci_total] == 0) { - ci_total++; - continue; - } - - if (b[2 * j] == 0) { - if (b[2 * j + 1] == 0) { - gsl_vector_set(x, ci_test, 2); - x_mean += 2.0; - } else { - gsl_vector_set(x, ci_test, 1); - x_mean += 1.0; + for (size_t i = 0; i < ni_total; ++i) { + // Read genotypes. + auto x_mean = 0.0; + auto n_miss = 0; + auto ci_total = 0; + auto ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + bset8 = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == (int)ni_total) { + break; } - } else { - if (b[2 * j + 1] == 1) { - gsl_vector_set(x, ci_test, 0); - } else { - gsl_vector_set(x, ci_test, -9); - n_miss++; + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; } - } - - ci_total++; - ci_test++; - } - } - - x_mean /= (double)(ni_test - n_miss); - - for (size_t i = 0; i < ni_test; ++i) { - geno = gsl_vector_get(x, i); - if (geno == -9) { - gsl_vector_set(x, i, x_mean); - geno = x_mean; - } - } - - gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); - gsl_vector_memcpy(&Xlarge_col.vector, x); - c++; - - if (c % msize == 0 || c == t_last) { - size_t l = 0; - if (c % msize == 0) { - l = msize; - } else { - l = c % msize; - } - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start = clock(); - eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - gsl_matrix_set_zero(Xlarge); - - for (size_t i = 0; i < l; i++) { - gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); - gsl_vector_memcpy(Utx, &UtXlarge_col.vector); - CalcUab(UtW, Uty, Utx, Uab); - - time_start = clock(); - FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; - - // 3 is before 1, for beta. - if (a_mode == 3 || a_mode == 4) { - CalcRLScore(l_mle_null, param1, beta, se, p_score); - } - - if (a_mode == 1 || a_mode == 4) { - CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, - logl_H1); - CalcRLWald(lambda_remle, param1, beta, se, p_wald); - } + if (bset8[2 * j] == 0) { + if (bset8[2 * j + 1] == 0) { + gs[ci_test] = 2.0; + // gsl_vector_set(x, ci_test, 2); + x_mean += 2.0; + } else { + gs[ci_test] = 1.0; + // gsl_vector_set(x, ci_test, 1); + x_mean += 1.0; + } + } else { + if (bset8[2 * j + 1] == 1) { + gs[ci_test] = 0.0; + // gsl_vector_set(x, ci_test, 0); + } else { + gs[ci_test] = nan(""); + // gsl_vector_set(x, ci_test, -9); + n_miss++; + } + } - if (a_mode == 2 || a_mode == 4) { - CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); - p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); + ci_total++; + ci_test++; } - - time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score, logl_H1}; - sumStat.push_back(SNPs); } } - } - cout << endl; - - gsl_vector_free(x); - gsl_vector_free(Utx); - gsl_matrix_free(Uab); - gsl_vector_free(ab); + string snp="unknown"; + return std::make_tuple(snp,gs); + }; - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); + LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps); infile.close(); infile.clear(); - - return; } void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX, |