about summary refs log tree commit diff
path: root/src/lmm.cpp
diff options
context:
space:
mode:
authorPjotr Prins2017-10-05 13:01:27 +0000
committerPjotr Prins2017-10-05 13:01:27 +0000
commit4d5cb9ae3847192c98ff585b9ad48f6103b2417b (patch)
tree7f3c089d78686eb32e2efddc1a97f93c3c876b87 /src/lmm.cpp
parent86323ccaf26ad0a3b706a67a0014dd04b9965823 (diff)
downloadpangemma-4d5cb9ae3847192c98ff585b9ad48f6103b2417b.tar.gz
Removed Oxford format as per https://github.com/genetics-statistics/GEMMA/issues/46
Diffstat (limited to 'src/lmm.cpp')
-rw-r--r--src/lmm.cpp281
1 files changed, 0 insertions, 281 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 1193700..50fb64c 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -56,9 +56,6 @@ void LMM::CopyFromParam(PARAM &cPar) {
   path_out = cPar.path_out;
   file_gene = cPar.file_gene;
 
-  // WJA added.
-  file_oxford = cPar.file_oxford;
-
   l_min = cPar.l_min;
   l_max = cPar.l_max;
   n_region = cPar.n_region;
@@ -1630,284 +1627,6 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
   return;
 }
 
-// WJA added.
-void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
-                      const gsl_matrix *UtW, const gsl_vector *Uty,
-                      const gsl_matrix *W, const gsl_vector *y) {
-  debug_msg("entering");
-  string file_bgen = file_oxford + ".bgen";
-  ifstream infile(file_bgen.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bgen file:" << file_bgen << endl;
-    return;
-  }
-
-  clock_t time_start = clock();
-  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
-  double p_lrt = 0, p_score = 0;
-  double logl_H1 = 0.0;
-  int n_miss, c_phen;
-  double geno, x_mean;
-
-  // Calculate basic quantities.
-  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
-
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
-
-  // Create a large matrix.
-  size_t msize = LMM_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix_set_zero(Xlarge);
-
-  gsl_matrix_set_zero(Uab);
-  CalcUab(UtW, Uty, Uab);
-
-  // Read in header.
-  uint32_t bgen_snp_block_offset;
-  uint32_t bgen_header_length;
-  uint32_t bgen_nsamples;
-  uint32_t bgen_nsnps;
-  uint32_t bgen_flags;
-  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
-  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
-  bgen_snp_block_offset -= 4;
-  infile.ignore(4 + bgen_header_length - 20);
-  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
-  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
-  bgen_snp_block_offset -= 4;
-  bool CompressedSNPBlocks = bgen_flags & 0x1;
-
-  infile.ignore(bgen_snp_block_offset);
-
-  double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
-  double bgen_geno_prob_non_miss;
-
-  uint32_t bgen_N;
-  uint16_t bgen_LS;
-  uint16_t bgen_LR;
-  uint16_t bgen_LC;
-  uint32_t bgen_SNP_pos;
-  uint32_t bgen_LA;
-  std::string bgen_A_allele;
-  uint32_t bgen_LB;
-  std::string bgen_B_allele;
-  uint32_t bgen_P;
-  size_t unzipped_data_size;
-  string id;
-  string rs;
-  string chr;
-  std::cout << "Warning: WJA hard coded SNP missingness "
-            << "threshold of 10%" << std::endl;
-
-  // Start reading genotypes and analyze.
-  size_t c = 0, t_last = 0;
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
-    t_last++;
-  }
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
-    }
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
-
-    // Read SNP header.
-    id.clear();
-    rs.clear();
-    chr.clear();
-    bgen_A_allele.clear();
-    bgen_B_allele.clear();
-
-    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
-    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
-    id.resize(bgen_LS);
-    infile.read(&id[0], bgen_LS);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
-    rs.resize(bgen_LR);
-    infile.read(&rs[0], bgen_LR);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
-    chr.resize(bgen_LC);
-    infile.read(&chr[0], bgen_LC);
-
-    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
-    bgen_A_allele.resize(bgen_LA);
-    infile.read(&bgen_A_allele[0], bgen_LA);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
-    bgen_B_allele.resize(bgen_LB);
-    infile.read(&bgen_B_allele[0], bgen_LB);
-
-    uint16_t unzipped_data[3 * bgen_N];
-
-    if (indicator_snp[t] == 0) {
-      if (CompressedSNPBlocks)
-        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      else
-        bgen_P = 6 * bgen_N;
-
-      infile.ignore(static_cast<size_t>(bgen_P));
-
-      continue;
-    }
-
-    if (CompressedSNPBlocks) {
-      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      uint8_t zipped_data[bgen_P];
-
-      unzipped_data_size = 6 * bgen_N;
-
-      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
-      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
-                              reinterpret_cast<uLongf *>(&unzipped_data_size),
-                              reinterpret_cast<Bytef *>(zipped_data),
-                              static_cast<uLong>(bgen_P));
-      assert(result == Z_OK);
-
-    } else {
-
-      bgen_P = 6 * bgen_N;
-      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
-    }
-
-    x_mean = 0.0;
-    c_phen = 0;
-    n_miss = 0;
-    gsl_vector_set_zero(x_miss);
-    for (size_t i = 0; i < bgen_N; ++i) {
-      if (indicator_idv[i] == 0) {
-        continue;
-      }
-
-      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
-      bgen_geno_prob_AB =
-          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
-      bgen_geno_prob_BB =
-          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-
-      // WJA.
-      bgen_geno_prob_non_miss =
-          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-      if (bgen_geno_prob_non_miss < 0.9) {
-        gsl_vector_set(x_miss, c_phen, 0.0);
-        n_miss++;
-      } else {
-
-        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
-        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
-        gsl_vector_set(x, c_phen, geno);
-        gsl_vector_set(x_miss, c_phen, 1.0);
-        x_mean += geno;
-      }
-      c_phen++;
-    }
-
-    x_mean /= static_cast<double>(ni_test - n_miss);
-
-    for (size_t i = 0; i < ni_test; ++i) {
-      if (gsl_vector_get(x_miss, i) == 0) {
-        gsl_vector_set(x, i, x_mean);
-      }
-      geno = gsl_vector_get(x, i);
-    }
-
-    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
-    gsl_vector_memcpy(&Xlarge_col.vector, x);
-    c++;
-
-    if (c % msize == 0 || c == t_last) {
-      size_t l = 0;
-      if (c % msize == 0) {
-        l = msize;
-      } else {
-        l = c % msize;
-      }
-
-      gsl_matrix_view Xlarge_sub =
-          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-      gsl_matrix_view UtXlarge_sub =
-          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-      time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-                     &UtXlarge_sub.matrix);
-      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-      gsl_matrix_set_zero(Xlarge);
-
-      for (size_t i = 0; i < l; i++) {
-        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
-        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
-
-        CalcUab(UtW, Uty, Utx, Uab);
-
-        time_start = clock();
-        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-        // 3 is before 1.
-        if (a_mode == 3 || a_mode == 4) {
-          CalcRLScore(l_mle_null, param1, beta, se, p_score);
-        }
-
-        if (a_mode == 1 || a_mode == 4) {
-          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
-                     logl_H1);
-          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
-        }
-
-        if (a_mode == 2 || a_mode == 4) {
-          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
-        }
-
-        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-        // Store summary data.
-        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                        p_wald, p_lrt, p_score, logl_H1};
-        sumStat.push_back(SNPs);
-      }
-    }
-  }
-  cout << endl;
-
-  gsl_vector_free(x);
-  gsl_vector_free(x_miss);
-  gsl_vector_free(Utx);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
-
-  gsl_matrix_free(Xlarge);
-  gsl_matrix_free(UtXlarge);
-
-  infile.close();
-  infile.clear();
-
-  return;
-}
-
 void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
                   const gsl_vector *Uty, const gsl_vector *K_eval,
                   const double l_min, const double l_max, const size_t n_region,