/*
 Genome-wide Efficient Mixed Model Association (GEMMA)
 Copyright (C) 2011-2017 Xiang Zhou

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <fstream>
#include <iostream>
#include <sstream>

#include <assert.h>
#include <bitset>
#include <cmath>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>

#include "gsl/gsl_blas.h"
#include "gsl/gsl_linalg.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"

#include "gsl/gsl_cdf.h"
#include "gsl/gsl_integration.h"
#include "gsl/gsl_min.h"
#include "gsl/gsl_roots.h"

#include "eigenlib.h"
#include "gzstream.h"
#include "lapack.h"
#include "lm.h"

using namespace std;

void LM::CopyFromParam(PARAM &cPar) {
  a_mode = cPar.a_mode;
  d_pace = cPar.d_pace;

  file_bfile = cPar.file_bfile;
  file_geno = cPar.file_geno;
  file_out = cPar.file_out;
  path_out = cPar.path_out;
  file_gene = cPar.file_gene;
  // WJA added
  file_oxford = cPar.file_oxford;

  time_opt = 0.0;

  ni_total = cPar.ni_total;
  ns_total = cPar.ns_total;
  ni_test = cPar.ni_test;
  ns_test = cPar.ns_test;
  n_cvt = cPar.n_cvt;

  ng_total = cPar.ng_total;
  ng_test = 0;

  indicator_idv = cPar.indicator_idv;
  indicator_snp = cPar.indicator_snp;
  snpInfo = cPar.snpInfo;

  return;
}

void LM::CopyToParam(PARAM &cPar) {
  cPar.time_opt = time_opt;
  cPar.ng_test = ng_test;
  return;
}

void LM::WriteFiles() {
  string file_str;
  file_str = path_out + "/" + file_out;
  file_str += ".assoc.txt";

  ofstream outfile(file_str.c_str(), ofstream::out);
  if (!outfile) {
    cout << "error writing file: " << file_str.c_str() << endl;
    return;
  }

  if (!file_gene.empty()) {
    outfile << "geneID"
            << "\t";

    if (a_mode == 51) {
      outfile << "beta"
              << "\t"
              << "se"
              << "\t"
              << "p_wald" << endl;
    } else if (a_mode == 52) {
      outfile << "p_lrt" << endl;
    } else if (a_mode == 53) {
      outfile << "beta"
              << "\t"
              << "se"
              << "\t"
              << "p_score" << endl;
    } else if (a_mode == 54) {
      outfile << "beta"
              << "\t"
              << "se"
              << "\t"
              << "p_wald"
              << "\t"
              << "p_lrt"
              << "\t"
              << "p_score" << endl;
    } else {
    }

    for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) {
      outfile << snpInfo[t].rs_number << "\t";

      if (a_mode == 51) {
        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
                << sumStat[t].se << "\t" << sumStat[t].p_wald << endl;
      } else if (a_mode == 52) {
        outfile << scientific << setprecision(6) << "\t" << sumStat[t].p_lrt
                << endl;
      } else if (a_mode == 53) {
        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
                << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
      } else if (a_mode == 54) {
        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
                << sumStat[t].se << "\t" << sumStat[t].p_wald << "\t"
                << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
      } else {
      }
    }
  } else {
    outfile << "chr"
            << "\t"
            << "rs"
            << "\t"
            << "ps"
            << "\t"
            << "n_mis"
            << "\t"
            << "n_obs"
            << "\t"
            << "allele1"
            << "\t"
            << "allele0"
            << "\t"
            << "af"
            << "\t";

    if (a_mode == 51) {
      outfile << "beta"
              << "\t"
              << "se"
              << "\t"
              << "p_wald" << endl;
    } else if (a_mode == 52) {
      outfile << "p_lrt" << endl;
    } else if (a_mode == 53) {
      outfile << "beta"
              << "\t"
              << "se"
              << "\t"
              << "p_score" << endl;
    } else if (a_mode == 54) {
      outfile << "beta"
              << "\t"
              << "se"
              << "\t"
              << "p_wald"
              << "\t"
              << "p_lrt"
              << "\t"
              << "p_score" << endl;
    } else {
    }

    size_t t = 0;
    for (size_t i = 0; i < snpInfo.size(); ++i) {
      if (indicator_snp[i] == 0) {
        continue;
      }

      outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
              << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
              << ni_test - snpInfo[i].n_miss << "\t" << snpInfo[i].a_minor
              << "\t" << snpInfo[i].a_major << "\t" << fixed << setprecision(3)
              << snpInfo[i].maf << "\t";

      if (a_mode == 51) {
        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
                << sumStat[t].se << "\t" << sumStat[t].p_wald << endl;
      } else if (a_mode == 52) {
        outfile << scientific << setprecision(6) << sumStat[t].p_lrt << endl;
      } else if (a_mode == 53) {
        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
                << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
      } else if (a_mode == 54) {
        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
                << sumStat[t].se << "\t" << sumStat[t].p_wald << "\t"
                << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
      } else {
      }
      t++;
    }
  }

  outfile.close();
  outfile.clear();
  return;
}

void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty,
             const gsl_vector *Wtx, const gsl_vector *y, const gsl_vector *x,
             double &xPwy, double &xPwx) {
  size_t c_size = Wty->size;
  double d;

  gsl_vector *WtWiWtx = gsl_vector_alloc(c_size);

  gsl_blas_ddot(x, x, &xPwx);
  gsl_blas_ddot(x, y, &xPwy);
  gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);

  gsl_blas_ddot(WtWiWtx, Wtx, &d);
  xPwx -= d;

  gsl_blas_ddot(WtWiWtx, Wty, &d);
  xPwy -= d;

  gsl_vector_free(WtWiWtx);

  return;
}

void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y,
             double &yPwy) {
  size_t c_size = Wty->size;
  double d;

  gsl_vector *WtWiWty = gsl_vector_alloc(c_size);

  gsl_blas_ddot(y, y, &yPwy);
  gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);

  gsl_blas_ddot(WtWiWty, Wty, &d);
  yPwy -= d;

  gsl_vector_free(WtWiWty);

  return;
}

// Calculate p-values and beta/se in a linear model.
void LmCalcP(const size_t test_mode, const double yPwy, const double xPwy,
             const double xPwx, const double df, const size_t n_size,
             double &beta, double &se, double &p_wald, double &p_lrt,
             double &p_score) {
  double yPxy = yPwy - xPwy * xPwy / xPwx;
  double se_wald, se_score;

  beta = xPwy / xPwx;
  se_wald = sqrt(yPxy / (df * xPwx));
  se_score = sqrt(yPwy / ((double)n_size * xPwx));

  p_wald = gsl_cdf_fdist_Q(beta * beta / (se_wald * se_wald), 1.0, df);
  p_score = gsl_cdf_fdist_Q(beta * beta / (se_score * se_score), 1.0, df);
  p_lrt = gsl_cdf_chisq_Q((double)n_size * (log(yPwy) - log(yPxy)), 1);

  if (test_mode == 3) {
    se = se_score;
  } else {
    se = se_wald;
  }

  return;
}

void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
  ifstream infile(file_gene.c_str(), ifstream::in);
  if (!infile) {
    cout << "error reading gene expression file:" << file_gene << endl;
    return;
  }

  clock_t time_start = clock();

  string line;
  char *ch_ptr;

  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
  int c_phen;
  string rs; // Gene id.
  double d;

  // Calculate some basic quantities.
  double yPwy, xPwy, xPwx;
  double df = (double)W->size1 - (double)W->size2 - 1.0;

  gsl_vector *y = gsl_vector_alloc(W->size1);

  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
  gsl_vector *Wty = gsl_vector_alloc(W->size2);
  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);

  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
  int sig;
  LUDecomp(WtW, pmt, &sig);
  LUInvert(WtW, pmt, WtWi);

  gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
  CalcvPv(WtWi, Wtx, x, xPwx);

  // Header.
  getline(infile, line);

  for (size_t t = 0; t < ng_total; t++) {
    getline(infile, line);
    if (t % d_pace == 0 || t == ng_total - 1) {
      ProgressBar("Performing Analysis ", t, ng_total - 1);
    }
    ch_ptr = strtok((char *)line.c_str(), " , \t");
    rs = ch_ptr;

    c_phen = 0;
    for (size_t i = 0; i < indicator_idv.size(); ++i) {
      ch_ptr = strtok(NULL, " , \t");
      if (indicator_idv[i] == 0) {
        continue;
      }

      d = atof(ch_ptr);
      gsl_vector_set(y, c_phen, d);

      c_phen++;
    }

    // Calculate statistics.
    time_start = clock();

    gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
    CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy);
    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
            p_lrt, p_score);

    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);

    // Store summary data.
    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
    sumStat.push_back(SNPs);
  }
  cout << endl;

  gsl_vector_free(y);

  gsl_matrix_free(WtW);
  gsl_matrix_free(WtWi);
  gsl_vector_free(Wty);
  gsl_vector_free(Wtx);
  gsl_permutation_free(pmt);

  infile.close();
  infile.clear();

  return;
}

// WJA added
void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) {
  string file_bgen = file_oxford + ".bgen";
  ifstream infile(file_bgen.c_str(), ios::binary);
  if (!infile) {
    cout << "error reading bgen file:" << file_bgen << endl;
    return;
  }

  clock_t time_start = clock();

  string line;
  char *ch_ptr;

  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
  int n_miss, c_phen;
  double geno, x_mean;

  // Calculate some basic quantities.
  double yPwy, xPwy, xPwx;
  double df = (double)W->size1 - (double)W->size2 - 1.0;

  gsl_vector *x = gsl_vector_alloc(W->size1);
  gsl_vector *x_miss = gsl_vector_alloc(W->size1);

  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
  gsl_vector *Wty = gsl_vector_alloc(W->size2);
  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);

  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
  int sig;
  LUDecomp(WtW, pmt, &sig);
  LUInvert(WtW, pmt, WtWi);

  gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
  CalcvPv(WtWi, Wty, y, yPwy);

  // Read in header.
  uint32_t bgen_snp_block_offset;
  uint32_t bgen_header_length;
  uint32_t bgen_nsamples;
  uint32_t bgen_nsnps;
  uint32_t bgen_flags;
  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
  bgen_snp_block_offset -= 4;
  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
  bgen_snp_block_offset -= 4;
  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
  bgen_snp_block_offset -= 4;
  infile.ignore(4 + bgen_header_length - 20);
  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
  bgen_snp_block_offset -= 4;
  bool CompressedSNPBlocks = bgen_flags & 0x1;

  infile.ignore(bgen_snp_block_offset);

  double bgen_geno_prob_AA, bgen_geno_prob_AB;
  double bgen_geno_prob_BB, bgen_geno_prob_non_miss;

  uint32_t bgen_N;
  uint16_t bgen_LS;
  uint16_t bgen_LR;
  uint16_t bgen_LC;
  uint32_t bgen_SNP_pos;
  uint32_t bgen_LA;
  std::string bgen_A_allele;
  uint32_t bgen_LB;
  std::string bgen_B_allele;
  uint32_t bgen_P;
  size_t unzipped_data_size;
  string id;
  string rs;
  string chr;
  std::cout << "Warning: WJA hard coded SNP missingness "
            << "threshold of 10%" << std::endl;

  // Start reading genotypes and analyze.
  for (size_t t = 0; t < indicator_snp.size(); ++t) {
    if (t % d_pace == 0 || t == (ns_total - 1)) {
      ProgressBar("Reading SNPs  ", t, ns_total - 1);
    }

    // Read SNP header.
    id.clear();
    rs.clear();
    chr.clear();
    bgen_A_allele.clear();
    bgen_B_allele.clear();

    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);

    id.resize(bgen_LS);
    infile.read(&id[0], bgen_LS);

    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
    rs.resize(bgen_LR);
    infile.read(&rs[0], bgen_LR);

    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
    chr.resize(bgen_LC);
    infile.read(&chr[0], bgen_LC);

    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);

    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
    bgen_A_allele.resize(bgen_LA);
    infile.read(&bgen_A_allele[0], bgen_LA);

    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
    bgen_B_allele.resize(bgen_LB);
    infile.read(&bgen_B_allele[0], bgen_LB);

    uint16_t unzipped_data[3 * bgen_N];

    if (indicator_snp[t] == 0) {
      if (CompressedSNPBlocks)
        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
      else
        bgen_P = 6 * bgen_N;

      infile.ignore(static_cast<size_t>(bgen_P));

      continue;
    }

    if (CompressedSNPBlocks) {
      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
      uint8_t zipped_data[bgen_P];

      unzipped_data_size = 6 * bgen_N;

      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);

      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
                              reinterpret_cast<uLongf *>(&unzipped_data_size),
                              reinterpret_cast<Bytef *>(zipped_data),
                              static_cast<uLong>(bgen_P));
      assert(result == Z_OK);

    } else {

      bgen_P = 6 * bgen_N;
      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
    }

    x_mean = 0.0;
    c_phen = 0;
    n_miss = 0;
    gsl_vector_set_zero(x_miss);
    for (size_t i = 0; i < bgen_N; ++i) {
      if (indicator_idv[i] == 0) {
        continue;
      }

      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
      bgen_geno_prob_AB =
          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
      bgen_geno_prob_BB =
          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;

      // WJA
      bgen_geno_prob_non_miss =
          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
      if (bgen_geno_prob_non_miss < 0.9) {
        gsl_vector_set(x_miss, c_phen, 0.0);
        n_miss++;
      } else {
        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;

        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;

        gsl_vector_set(x, c_phen, geno);
        gsl_vector_set(x_miss, c_phen, 1.0);
        x_mean += geno;
      }
      c_phen++;
    }

    x_mean /= static_cast<double>(ni_test - n_miss);

    for (size_t i = 0; i < ni_test; ++i) {
      if (gsl_vector_get(x_miss, i) == 0) {
        gsl_vector_set(x, i, x_mean);
      }
      geno = gsl_vector_get(x, i);
    }

    // Calculate statistics.
    time_start = clock();

    gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
    CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
            p_lrt, p_score);

    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);

    // Store summary data.
    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
    sumStat.push_back(SNPs);
  }
  cout << endl;

  gsl_vector_free(x);
  gsl_vector_free(x_miss);

  gsl_matrix_free(WtW);
  gsl_matrix_free(WtWi);
  gsl_vector_free(Wty);
  gsl_vector_free(Wtx);
  gsl_permutation_free(pmt);

  infile.close();
  infile.clear();

  return;
}

void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
  igzstream infile(file_geno.c_str(), igzstream::in);
  if (!infile) {
    cout << "error reading genotype file:" << file_geno << endl;
    return;
  }

  clock_t time_start = clock();

  string line;
  char *ch_ptr;

  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
  int n_miss, c_phen;
  double geno, x_mean;

  // Calculate some basic quantities.
  double yPwy, xPwy, xPwx;
  double df = (double)W->size1 - (double)W->size2 - 1.0;

  gsl_vector *x = gsl_vector_alloc(W->size1);
  gsl_vector *x_miss = gsl_vector_alloc(W->size1);

  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
  gsl_vector *Wty = gsl_vector_alloc(W->size2);
  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);

  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
  int sig;
  LUDecomp(WtW, pmt, &sig);
  LUInvert(WtW, pmt, WtWi);

  gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
  CalcvPv(WtWi, Wty, y, yPwy);

  // Start reading genotypes and analyze.
  for (size_t t = 0; t < indicator_snp.size(); ++t) {
    getline(infile, line);
    if (t % d_pace == 0 || t == (ns_total - 1)) {
      ProgressBar("Reading SNPs  ", t, ns_total - 1);
    }
    if (indicator_snp[t] == 0) {
      continue;
    }

    ch_ptr = strtok((char *)line.c_str(), " , \t");
    ch_ptr = strtok(NULL, " , \t");
    ch_ptr = strtok(NULL, " , \t");

    x_mean = 0.0;
    c_phen = 0;
    n_miss = 0;
    gsl_vector_set_zero(x_miss);
    for (size_t i = 0; i < ni_total; ++i) {
      ch_ptr = strtok(NULL, " , \t");
      if (indicator_idv[i] == 0) {
        continue;
      }

      if (strcmp(ch_ptr, "NA") == 0) {
        gsl_vector_set(x_miss, c_phen, 0.0);
        n_miss++;
      } else {
        geno = atof(ch_ptr);

        gsl_vector_set(x, c_phen, geno);
        gsl_vector_set(x_miss, c_phen, 1.0);
        x_mean += geno;
      }
      c_phen++;
    }

    x_mean /= (double)(ni_test - n_miss);

    for (size_t i = 0; i < ni_test; ++i) {
      if (gsl_vector_get(x_miss, i) == 0) {
        gsl_vector_set(x, i, x_mean);
      }
      geno = gsl_vector_get(x, i);
    }

    // Calculate statistics.
    time_start = clock();

    gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
    CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
            p_lrt, p_score);

    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);

    // Store summary data.
    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
    sumStat.push_back(SNPs);
  }
  cout << endl;

  gsl_vector_free(x);
  gsl_vector_free(x_miss);

  gsl_matrix_free(WtW);
  gsl_matrix_free(WtWi);
  gsl_vector_free(Wty);
  gsl_vector_free(Wtx);
  gsl_permutation_free(pmt);

  infile.close();
  infile.clear();

  return;
}

void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) {
  string file_bed = file_bfile + ".bed";
  ifstream infile(file_bed.c_str(), ios::binary);
  if (!infile) {
    cout << "error reading bed file:" << file_bed << endl;
    return;
  }

  clock_t time_start = clock();

  char ch[1];
  bitset<8> b;

  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
  int n_bit, n_miss, ci_total, ci_test;
  double geno, x_mean;

  // Calculate some basic quantities.
  double yPwy, xPwy, xPwx;
  double df = (double)W->size1 - (double)W->size2 - 1.0;

  gsl_vector *x = gsl_vector_alloc(W->size1);

  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
  gsl_vector *Wty = gsl_vector_alloc(W->size2);
  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);

  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
  int sig;
  LUDecomp(WtW, pmt, &sig);
  LUInvert(WtW, pmt, WtWi);

  gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
  CalcvPv(WtWi, Wty, y, yPwy);

  // Calculate n_bit and c, the number of bit for each SNP.
  if (ni_total % 4 == 0) {
    n_bit = ni_total / 4;
  } else {
    n_bit = ni_total / 4 + 1;
  }

  // Print the first three magic numbers.
  for (int i = 0; i < 3; ++i) {
    infile.read(ch, 1);
    b = ch[0];
  }

  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
    }
    if (indicator_snp[t] == 0) {
      continue;
    }

    // n_bit, and 3 is the number of magic numbers.
    infile.seekg(t * n_bit + 3);

    // Read genotypes.
    x_mean = 0.0;
    n_miss = 0;
    ci_total = 0;
    ci_test = 0;
    for (int i = 0; i < n_bit; ++i) {
      infile.read(ch, 1);
      b = ch[0];

      // Minor allele homozygous: 2.0; major: 0.0;
      for (size_t j = 0; j < 4; ++j) {
        if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
          break;
        }
        if (indicator_idv[ci_total] == 0) {
          ci_total++;
          continue;
        }

        if (b[2 * j] == 0) {
          if (b[2 * j + 1] == 0) {
            gsl_vector_set(x, ci_test, 2);
            x_mean += 2.0;
          } else {
            gsl_vector_set(x, ci_test, 1);
            x_mean += 1.0;
          }
        } else {
          if (b[2 * j + 1] == 1) {
            gsl_vector_set(x, ci_test, 0);
          } else {
            gsl_vector_set(x, ci_test, -9);
            n_miss++;
          }
        }

        ci_total++;
        ci_test++;
      }
    }

    x_mean /= (double)(ni_test - n_miss);

    for (size_t i = 0; i < ni_test; ++i) {
      geno = gsl_vector_get(x, i);
      if (geno == -9) {
        gsl_vector_set(x, i, x_mean);
        geno = x_mean;
      }
    }

    // Calculate statistics.
    time_start = clock();

    gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
    CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
            p_lrt, p_score);

    // store summary data
    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
    sumStat.push_back(SNPs);

    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
  }
  cout << endl;

  gsl_vector_free(x);

  gsl_matrix_free(WtW);
  gsl_matrix_free(WtWi);
  gsl_vector_free(Wty);
  gsl_vector_free(Wtx);
  gsl_permutation_free(pmt);

  infile.close();
  infile.clear();

  return;
}

// Make sure that both y and X are centered already.
void MatrixCalcLmLR(const gsl_matrix *X, const gsl_vector *y,
                    vector<pair<size_t, double>> &pos_loglr) {
  double yty, xty, xtx, log_lr;
  gsl_blas_ddot(y, y, &yty);

  for (size_t i = 0; i < X->size2; ++i) {
    gsl_vector_const_view X_col = gsl_matrix_const_column(X, i);
    gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx);
    gsl_blas_ddot(&X_col.vector, y, &xty);

    log_lr = 0.5 * (double)y->size * (log(yty) - log(yty - xty * xty / xtx));
    pos_loglr.push_back(make_pair(i, log_lr));
  }

  return;
}