diff options
Diffstat (limited to 'src/lmm.cpp')
-rw-r--r-- | src/lmm.cpp | 827 |
1 files changed, 243 insertions, 584 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp index 134fbf9..ae8b747 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -39,8 +39,10 @@ #include "gsl/gsl_vector.h" #include "eigenlib.h" + #include "gzstream.h" #include "io.h" +#include "fastblas.h" #include "lapack.h" #include "lmm.h" @@ -56,9 +58,6 @@ void LMM::CopyFromParam(PARAM &cPar) { path_out = cPar.path_out; file_gene = cPar.file_gene; - // WJA added. - file_oxford = cPar.file_oxford; - l_min = cPar.l_min; l_max = cPar.l_max; n_region = cPar.n_region; @@ -107,10 +106,10 @@ void LMM::WriteFiles() { } auto common_header = [&] () { - if (a_mode != 2) + if (a_mode != 2) { outfile << "beta" << "\t"; - - outfile << "se" << "\t"; + outfile << "se" << "\t"; + } outfile << "logl_H1" << "\t"; // we may make this an option @@ -139,10 +138,10 @@ void LMM::WriteFiles() { auto sumstats = [&] (SUMSTAT st) { outfile << scientific << setprecision(6); - if (a_mode != 2) + if (a_mode != 2) { outfile << st.beta << "\t"; - - outfile << st.se << "\t"; + outfile << st.se << "\t"; + } outfile << st.logl_H1 << "\t"; @@ -364,9 +363,9 @@ double LogL_f(double l, void *params) { double f = 0.0, logdet_h = 0.0, d; size_t index_yy; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size); gsl_vector_memcpy(v_temp, p->eval); gsl_vector_scale(v_temp, l); @@ -414,11 +413,11 @@ double LogL_dev1(double l, void *params) { double dev1 = 0.0, trace_Hi = 0.0; size_t index_yy; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size); gsl_vector_memcpy(v_temp, p->eval); gsl_vector_scale(v_temp, l); @@ -477,13 +476,13 @@ double LogL_dev2(double l, void *params) { double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0; size_t index_yy; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size); gsl_vector_memcpy(v_temp, p->eval); gsl_vector_scale(v_temp, l); @@ -554,13 +553,13 @@ void LogL_dev12(double l, void *params, double *dev1, double *dev2) { double trace_Hi = 0.0, trace_HiHi = 0.0; size_t index_yy; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size); gsl_vector_memcpy(v_temp, p->eval); gsl_vector_scale(v_temp, l); @@ -637,10 +636,10 @@ double LogRL_f(double l, void *params) { double f = 0.0, logdet_h = 0.0, logdet_hiw = 0.0, d; size_t index_ww; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *Iab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *Iab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size); gsl_vector_memcpy(v_temp, p->eval); gsl_vector_scale(v_temp, l); @@ -702,11 +701,11 @@ double LogRL_dev1(double l, void *params) { double dev1 = 0.0, trace_Hi = 0.0; size_t index_ww; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size); gsl_vector_memcpy(v_temp, p->eval); gsl_vector_scale(v_temp, l); @@ -778,13 +777,13 @@ double LogRL_dev2(double l, void *params) { double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0; size_t index_ww; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size); gsl_vector_memcpy(v_temp, p->eval); gsl_vector_scale(v_temp, l); @@ -868,13 +867,13 @@ void LogRL_dev12(double l, void *params, double *dev1, double *dev2) { double trace_Hi = 0.0, trace_HiHi = 0.0; size_t index_ww; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size); gsl_vector_memcpy(v_temp, p->eval); gsl_vector_scale(v_temp, l); @@ -948,9 +947,9 @@ void LMM::CalcRLWald(const double &l, const FUNC_PARAM ¶ms, double &beta, int df = (int)ni_test - (int)n_cvt - 1; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size); - gsl_vector *v_temp = gsl_vector_alloc(params.eval->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size); + gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size); gsl_vector_memcpy(v_temp, params.eval); gsl_vector_scale(v_temp, l); @@ -990,9 +989,9 @@ void LMM::CalcRLScore(const double &l, const FUNC_PARAM ¶ms, double &beta, int df = (int)ni_test - (int)n_cvt - 1; - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size); - gsl_vector *v_temp = gsl_vector_alloc(params.eval->size); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size); + gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size); gsl_vector_memcpy(v_temp, params.eval); gsl_vector_scale(v_temp, l); @@ -1031,7 +1030,7 @@ void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) { size_t index_ab; size_t n_cvt = UtW->size2; - gsl_vector *u_a = gsl_vector_alloc(Uty->size); + gsl_vector *u_a = gsl_vector_safe_alloc(Uty->size); for (size_t a = 1; a <= n_cvt + 2; ++a) { if (a == n_cvt + 1) { @@ -1097,8 +1096,8 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) { size_t n_cvt = W->size2; double d; - gsl_vector *v_a = gsl_vector_alloc(y->size); - gsl_vector *v_b = gsl_vector_alloc(y->size); + gsl_vector *v_a = gsl_vector_safe_alloc(y->size); + gsl_vector *v_b = gsl_vector_safe_alloc(y->size); for (size_t a = 1; a <= n_cvt + 2; ++a) { if (a == n_cvt + 1) { @@ -1142,7 +1141,7 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, size_t n_cvt = W->size2; double d; - gsl_vector *v_b = gsl_vector_alloc(y->size); + gsl_vector *v_b = gsl_vector_safe_alloc(y->size); for (size_t b = 1; b <= n_cvt + 2; ++b) { index_ab = GetabIndex(n_cvt + 1, b, n_cvt); @@ -1167,6 +1166,7 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x) { + debug_msg(file_gene); igzstream infile(file_gene.c_str(), igzstream::in); if (!infile) { cout << "error reading gene expression file:" << file_gene << endl; @@ -1188,25 +1188,25 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, // Calculate basic quantities. size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - gsl_vector *y = gsl_vector_alloc(U->size1); - gsl_vector *Uty = gsl_vector_alloc(U->size2); - gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); + gsl_vector *y = gsl_vector_safe_alloc(U->size1); + gsl_vector *Uty = gsl_vector_safe_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_safe_alloc(n_index); // Header. getline(infile, line); for (size_t t = 0; t < ng_total; t++) { - !safeGetline(infile, line).eof(); + safeGetline(infile, line).eof(); if (t % d_pace == 0 || t == ng_total - 1) { - ProgressBar("Performing Analysis ", t, ng_total - 1); + ProgressBar("Performing Analysis", t, ng_total - 1); } - ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok_safe((char *)line.c_str(), " , \t"); rs = ch_ptr; c_phen = 0; for (size_t i = 0; i < indicator_idv.size(); ++i) { - ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok_safe(NULL, " , \t"); if (indicator_idv[i] == 0) { continue; } @@ -1271,35 +1271,37 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, return; } -void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y, - const set<string> gwasnps) { - debug_msg("entering"); + +void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, + const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const set<string> gwasnps) { clock_t time_start = clock(); - // LOCO support + // Subset/LOCO support bool process_gwasnps = gwasnps.size(); if (process_gwasnps) - debug_msg("AnalyzeBimbam w. LOCO"); + debug_msg("Analyze subset of SNPs (LOCO)"); // Calculate basic quantities. size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; const size_t inds = U->size1; - gsl_vector *x = gsl_vector_alloc(inds); // #inds - gsl_vector *x_miss = gsl_vector_alloc(inds); - gsl_vector *Utx = gsl_vector_alloc(U->size2); - gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); + enforce(inds == ni_test); + gsl_vector *x = gsl_vector_safe_alloc(inds); // #inds + gsl_vector *x_miss = gsl_vector_safe_alloc(inds); + gsl_vector *Utx = gsl_vector_safe_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_safe_alloc(n_index); // Create a large matrix with LMM_BATCH_SIZE columns for batched processing // const size_t msize=(process_gwasnps ? 1 : LMM_BATCH_SIZE); const size_t msize = LMM_BATCH_SIZE; - gsl_matrix *Xlarge = gsl_matrix_alloc(inds, msize); - gsl_matrix *UtXlarge = gsl_matrix_alloc(inds, msize); - + gsl_matrix *Xlarge = gsl_matrix_safe_alloc(inds, msize); + gsl_matrix *UtXlarge = gsl_matrix_safe_alloc(inds, msize); enforce_msg(Xlarge && UtXlarge, "Xlarge memory check"); // just to be sure + enforce(Xlarge->size1 == inds); gsl_matrix_set_zero(Xlarge); gsl_matrix_set_zero(Uab); CalcUab(UtW, Uty, Uab); @@ -1307,9 +1309,6 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, // start reading genotypes and analyze size_t c = 0; - igzstream infile(file_geno.c_str(), igzstream::in); - enforce_msg(infile, "error reading genotype file"); - auto batch_compute = [&](size_t l) { // using a C++ closure // Compute SNPs in batch, note the computations are independent per SNP gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l); @@ -1317,7 +1316,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l); time_start = clock(); - eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, + fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix); time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); @@ -1332,8 +1331,8 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, time_start = clock(); FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; - double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; - double p_lrt = 0, p_score = 0; + double lambda_mle = 0.0, lambda_remle = 0.0, beta = 0.0, se = 0.0, p_wald = 0.0; + double p_lrt = 0.0, p_score = 0.0; double logl_H1 = 0.0; // 3 is before 1. @@ -1361,54 +1360,69 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, } }; - for (size_t t = 0; t < indicator_snp.size(); ++t) { - // for every SNP - string line; - safeGetline(infile, line); - if (t % d_pace == 0 || t == (ns_total - 1)) { - ProgressBar("Reading SNPs ", t, ns_total - 1); + const auto num_snps = indicator_snp.size(); + const size_t progress_step = (num_snps/50>d_pace ? num_snps/50 : d_pace); + + for (size_t t = 0; t < num_snps; ++t) { + if (t % progress_step == 0 || t == (num_snps - 1)) { + ProgressBar("Reading SNPs", t, num_snps - 1); } if (indicator_snp[t] == 0) continue; - char *ch_ptr = strtok((char *)line.c_str(), " , \t"); - auto snp = string(ch_ptr); + auto tup = fetch_snp(t); + auto snp = get<0>(tup); + auto gs = get<1>(tup); + // check whether SNP is included in gwasnps (used by LOCO) if (process_gwasnps && gwasnps.count(snp) == 0) continue; - ch_ptr = strtok(NULL, " , \t"); - ch_ptr = strtok(NULL, " , \t"); - double x_mean = 0.0; - int c_phen = 0; - int n_miss = 0; + // drop missing idv and plug mean values for missing geno + double x_total = 0.0; // sum genotype values to compute x_mean + uint pos = 0; // position in target vector + uint n_miss = 0; gsl_vector_set_zero(x_miss); for (size_t i = 0; i < ni_total; ++i) { // get the genotypes per individual and compute stats per SNP - ch_ptr = strtok(NULL, " , \t"); - if (indicator_idv[i] == 0) + if (indicator_idv[i] == 0) // skip individual continue; - if (strcmp(ch_ptr, "NA") == 0) { - gsl_vector_set(x_miss, c_phen, 0.0); + double geno = gs[i]; + if (std::isnan(geno)) { + gsl_vector_set(x_miss, pos, 1.0); n_miss++; } else { - double geno = atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean += geno; + gsl_vector_set(x, pos, geno); + x_total += geno; } - c_phen++; + pos++; } + enforce(pos == ni_test); - x_mean /= (double)(ni_test - n_miss); + const double x_mean = x_total/(double)(ni_test - n_miss); + // plug x_mean back into missing values for (size_t i = 0; i < ni_test; ++i) { - if (gsl_vector_get(x_miss, i) == 0) { + if (gsl_vector_get(x_miss, i) == 1.0) { gsl_vector_set(x, i, x_mean); } } + + /* this is what below GxE does + for (size_t i = 0; i < ni_test; ++i) { + auto geno = gsl_vector_get(x, i); + if (std::isnan(geno)) { + gsl_vector_set(x, i, x_mean); + geno = x_mean; + } + if (x_mean > 1.0) { + gsl_vector_set(x, i, 2 - geno); + } + } + */ + enforce(x->size == ni_test); + // copy genotype values for SNP into Xlarge cache gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); gsl_vector_memcpy(&Xlarge_col.vector, x); @@ -1418,6 +1432,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, batch_compute(msize); } batch_compute(c % msize); + ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1); // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl; cout << endl; @@ -1430,114 +1445,111 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, gsl_matrix_free(Xlarge); gsl_matrix_free(UtXlarge); - infile.close(); - infile.clear(); - - return; } -void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y) { - debug_msg("entering"); - string file_bed = file_bfile + ".bed"; - ifstream infile(file_bed.c_str(), ios::binary); - if (!infile) { - cout << "error reading bed file:" << file_bed << endl; - return; - } +void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const set<string> gwasnps) { + debug_msg(file_geno); - clock_t time_start = clock(); + igzstream infile(file_geno.c_str(), igzstream::in); + enforce_msg(infile, "error reading genotype file"); + size_t prev_line = 0; - char ch[1]; - bitset<8> b; + std::vector <double> gs; + gs.resize(ni_total); - double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; - double p_lrt = 0, p_score = 0; - double logl_H1 = 0.0; - int n_bit, n_miss, ci_total, ci_test; - double geno, x_mean; + // fetch_snp is a callback function for every SNP row + std::function<SnpNameValues(size_t)> fetch_snp = [&](size_t num) { + string line; + while (prev_line <= num) { + // also read SNPs that were skipped + safeGetline(infile, line); + prev_line++; + } + char *ch_ptr = strtok((char *)line.c_str(), " , \t"); + enforce_msg(ch_ptr, "Parsing BIMBAM genofile"); // ch_ptr should not be NULL - // Calculate basic quantities. - size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + auto snp = string(ch_ptr); + ch_ptr = strtok_safe(NULL, " , \t"); // skip column + ch_ptr = strtok_safe(NULL, " , \t"); // skip column - gsl_vector *x = gsl_vector_alloc(U->size1); - gsl_vector *Utx = gsl_vector_alloc(U->size2); - gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); + gs.assign (ni_total,nan("")); // wipe values - // Create a large matrix. - size_t msize = LMM_BATCH_SIZE; - gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix_set_zero(Xlarge); + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + enforce_msg(ch_ptr,line.c_str()); + if (strcmp(ch_ptr, "NA") != 0) + gs[i] = atof(ch_ptr); + } + return std::make_tuple(snp,gs); + }; - gsl_matrix_set_zero(Uab); - CalcUab(UtW, Uty, Uab); + LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps); + infile.close(); + infile.clear(); +} + +void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const set<string> gwasnps) { + string file_bed = file_bfile + ".bed"; + debug_msg(file_bed); + + ifstream infile(file_bed.c_str(), ios::binary); + enforce_msg(infile,"error reading genotype (.bed) file"); + + bitset<8> bset8; + char ch[1]; // buffer // Calculate n_bit and c, the number of bit for each SNP. - if (ni_total % 4 == 0) { - n_bit = ni_total / 4; - } else { - n_bit = ni_total / 4 + 1; - } + const size_t n_bit = (ni_total % 4 == 0 ? ni_total / 4 : ni_total / 4 + 1); - // Print the first three magic numbers. + // first three magic numbers. for (int i = 0; i < 3; ++i) { infile.read(ch, 1); - b = ch[0]; + const bitset<8> b = ch[0]; } - size_t c = 0, t_last = 0; - for (size_t t = 0; t < snpInfo.size(); ++t) { - if (indicator_snp[t] == 0) - continue; - t_last++; - } - for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { - if (t % d_pace == 0 || t == snpInfo.size() - 1) { - ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); - } - if (indicator_snp[t] == 0) { - continue; - } + std::vector <double> gs; + gs.resize(ni_total); + // fetch_snp is a callback function for every SNP row + std::function<SnpNameValues(size_t)> fetch_snp = [&](size_t num) { + gs.assign (ni_total,nan("")); // wipe values // n_bit, and 3 is the number of magic numbers. + auto t = num; infile.seekg(t * n_bit + 3); - - // Read genotypes. - x_mean = 0.0; - n_miss = 0; - ci_total = 0; - ci_test = 0; + auto ci_total = 0; + auto ci_test = 0; + // ---- for all genotypes for (int i = 0; i < n_bit; ++i) { infile.read(ch, 1); - b = ch[0]; + bset8 = ch[0]; // Minor allele homozygous: 2.0; major: 0.0. for (size_t j = 0; j < 4; ++j) { if ((i == (n_bit - 1)) && ci_total == (int)ni_total) { break; } - if (indicator_idv[ci_total] == 0) { + if (indicator_idv[ci_total] == 0) { // skip individual ci_total++; continue; } - if (b[2 * j] == 0) { - if (b[2 * j + 1] == 0) { - gsl_vector_set(x, ci_test, 2); - x_mean += 2.0; + if (bset8[2 * j] == 0) { + if (bset8[2 * j + 1] == 0) { + gs[ci_test] = 2.0; } else { - gsl_vector_set(x, ci_test, 1); - x_mean += 1.0; + gs[ci_test] = 1.0; } } else { - if (b[2 * j + 1] == 1) { - gsl_vector_set(x, ci_test, 0); + if (bset8[2 * j + 1] == 1) { + gs[ci_test] = 0.0; } else { - gsl_vector_set(x, ci_test, -9); - n_miss++; + gs[ci_test] = nan(""); // already set to NaN - originally was -9.0 } } @@ -1545,367 +1557,14 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, ci_test++; } } + string snp="unknown"; + return std::make_tuple(snp,gs); + }; - x_mean /= (double)(ni_test - n_miss); - - for (size_t i = 0; i < ni_test; ++i) { - geno = gsl_vector_get(x, i); - if (geno == -9) { - gsl_vector_set(x, i, x_mean); - geno = x_mean; - } - } - - gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); - gsl_vector_memcpy(&Xlarge_col.vector, x); - c++; - - if (c % msize == 0 || c == t_last) { - size_t l = 0; - if (c % msize == 0) { - l = msize; - } else { - l = c % msize; - } - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start = clock(); - eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - gsl_matrix_set_zero(Xlarge); - - for (size_t i = 0; i < l; i++) { - gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); - gsl_vector_memcpy(Utx, &UtXlarge_col.vector); - - CalcUab(UtW, Uty, Utx, Uab); - - time_start = clock(); - FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; - - // 3 is before 1, for beta. - if (a_mode == 3 || a_mode == 4) { - CalcRLScore(l_mle_null, param1, beta, se, p_score); - } - - if (a_mode == 1 || a_mode == 4) { - CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, - logl_H1); - CalcRLWald(lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode == 2 || a_mode == 4) { - CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); - p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); - } - - time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score, logl_H1}; - sumStat.push_back(SNPs); - } - } - } - cout << endl; - - gsl_vector_free(x); - gsl_vector_free(Utx); - gsl_matrix_free(Uab); - gsl_vector_free(ab); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); - - infile.close(); - infile.clear(); - - return; -} - -// WJA added. -void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y) { - debug_msg("entering"); - string file_bgen = file_oxford + ".bgen"; - ifstream infile(file_bgen.c_str(), ios::binary); - if (!infile) { - cout << "error reading bgen file:" << file_bgen << endl; - return; - } - - clock_t time_start = clock(); - double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; - double p_lrt = 0, p_score = 0; - double logl_H1 = 0.0; - int n_miss, c_phen; - double geno, x_mean; - - // Calculate basic quantities. - size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - - gsl_vector *x = gsl_vector_alloc(U->size1); - gsl_vector *x_miss = gsl_vector_alloc(U->size1); - gsl_vector *Utx = gsl_vector_alloc(U->size2); - gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); - - // Create a large matrix. - size_t msize = LMM_BATCH_SIZE; - gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - gsl_matrix_set_zero(Uab); - CalcUab(UtW, Uty, Uab); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); - infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); - bgen_snp_block_offset -= 4; - infile.ignore(4 + bgen_header_length - 20); - bgen_snp_block_offset -= 4 + bgen_header_length - 20; - infile.read(reinterpret_cast<char *>(&bgen_flags), 4); - bgen_snp_block_offset -= 4; - bool CompressedSNPBlocks = bgen_flags & 0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB; - double bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - std::cout << "Warning: WJA hard coded SNP missingness " - << "threshold of 10%" << std::endl; - - // Start reading genotypes and analyze. - size_t c = 0, t_last = 0; - for (size_t t = 0; t < indicator_snp.size(); ++t) { - if (indicator_snp[t] == 0) { - continue; - } - t_last++; - } - for (size_t t = 0; t < indicator_snp.size(); ++t) { - if (t % d_pace == 0 || t == (ns_total - 1)) { - ProgressBar("Reading SNPs ", t, ns_total - 1); - } - if (indicator_snp[t] == 0) { - continue; - } - - // Read SNP header. - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char *>(&bgen_N), 4); - infile.read(reinterpret_cast<char *>(&bgen_LS), 2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char *>(&bgen_LR), 2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char *>(&bgen_LC), 2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); - - infile.read(reinterpret_cast<char *>(&bgen_LA), 4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - infile.read(reinterpret_cast<char *>(&bgen_LB), 4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3 * bgen_N]; - - if (indicator_snp[t] == 0) { - if (CompressedSNPBlocks) - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - else - bgen_P = 6 * bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if (CompressedSNPBlocks) { - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size = 6 * bgen_N; - - infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); - - int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), - reinterpret_cast<uLongf *>(&unzipped_data_size), - reinterpret_cast<Bytef *>(zipped_data), - static_cast<uLong>(bgen_P)); - assert(result == Z_OK); - - } else { - - bgen_P = 6 * bgen_N; - infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); - } - - x_mean = 0.0; - c_phen = 0; - n_miss = 0; - gsl_vector_set_zero(x_miss); - for (size_t i = 0; i < bgen_N; ++i) { - if (indicator_idv[i] == 0) { - continue; - } - - bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; - bgen_geno_prob_AB = - static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; - bgen_geno_prob_BB = - static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; - - // WJA. - bgen_geno_prob_non_miss = - bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss < 0.9) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } else { - - bgen_geno_prob_AA /= bgen_geno_prob_non_miss; - bgen_geno_prob_AB /= bgen_geno_prob_non_miss; - bgen_geno_prob_BB /= bgen_geno_prob_non_miss; - - geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean += geno; - } - c_phen++; - } - - x_mean /= static_cast<double>(ni_test - n_miss); - - for (size_t i = 0; i < ni_test; ++i) { - if (gsl_vector_get(x_miss, i) == 0) { - gsl_vector_set(x, i, x_mean); - } - geno = gsl_vector_get(x, i); - } - - gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); - gsl_vector_memcpy(&Xlarge_col.vector, x); - c++; - - if (c % msize == 0 || c == t_last) { - size_t l = 0; - if (c % msize == 0) { - l = msize; - } else { - l = c % msize; - } - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start = clock(); - eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - gsl_matrix_set_zero(Xlarge); - - for (size_t i = 0; i < l; i++) { - gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); - gsl_vector_memcpy(Utx, &UtXlarge_col.vector); - - CalcUab(UtW, Uty, Utx, Uab); - - time_start = clock(); - FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; - - // 3 is before 1. - if (a_mode == 3 || a_mode == 4) { - CalcRLScore(l_mle_null, param1, beta, se, p_score); - } - - if (a_mode == 1 || a_mode == 4) { - CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, - logl_H1); - CalcRLWald(lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode == 2 || a_mode == 4) { - CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); - p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); - } - - time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score, logl_H1}; - sumStat.push_back(SNPs); - } - } - } - cout << endl; - - gsl_vector_free(x); - gsl_vector_free(x_miss); - gsl_vector_free(Utx); - gsl_matrix_free(Uab); - gsl_vector_free(ab); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); + LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps); infile.close(); infile.clear(); - - return; } void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX, @@ -1914,10 +1573,10 @@ void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX, vector<pair<size_t, double>> &pos_loglr) { double logl_H0, logl_H1, log_lr, lambda0, lambda1; - gsl_vector *w = gsl_vector_alloc(Uty->size); - gsl_matrix *Utw = gsl_matrix_alloc(Uty->size, 1); - gsl_matrix *Uab = gsl_matrix_alloc(Uty->size, 6); - gsl_vector *ab = gsl_vector_alloc(6); + gsl_vector *w = gsl_vector_safe_alloc(Uty->size); + gsl_matrix *Utw = gsl_matrix_safe_alloc(Uty->size, 1); + gsl_matrix *Uab = gsl_matrix_safe_alloc(Uty->size, 6); + gsl_vector *ab = gsl_vector_safe_alloc(6); gsl_vector_set_zero(ab); gsl_vector_set_all(w, 1.0); @@ -2122,8 +1781,8 @@ void CalcLambda(const char func_name, const gsl_vector *eval, size_t n_cvt = UtW->size2, ni_test = UtW->size1; size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); + gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index); + gsl_vector *ab = gsl_vector_safe_alloc(n_index); gsl_matrix_set_zero(Uab); CalcUab(UtW, Uty, Uab); @@ -2145,8 +1804,8 @@ void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW, size_t n_cvt = UtW->size2, ni_test = UtW->size1; size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); + gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index); + gsl_vector *ab = gsl_vector_safe_alloc(n_index); gsl_matrix_set_zero(Uab); CalcUab(UtW, Uty, Uab); @@ -2172,15 +1831,15 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW, size_t n_cvt = UtW->size2, ni_test = UtW->size1; size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); - gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); - gsl_vector *Hi_eval = gsl_vector_alloc(eval->size); - gsl_vector *v_temp = gsl_vector_alloc(eval->size); - gsl_matrix *HiW = gsl_matrix_alloc(eval->size, UtW->size2); - gsl_matrix *WHiW = gsl_matrix_alloc(UtW->size2, UtW->size2); - gsl_vector *WHiy = gsl_vector_alloc(UtW->size2); - gsl_matrix *Vbeta = gsl_matrix_alloc(UtW->size2, UtW->size2); + gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index); + gsl_vector *ab = gsl_vector_safe_alloc(n_index); + gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_safe_alloc(eval->size); + gsl_vector *v_temp = gsl_vector_safe_alloc(eval->size); + gsl_matrix *HiW = gsl_matrix_safe_alloc(eval->size, UtW->size2); + gsl_matrix *WHiW = gsl_matrix_safe_alloc(UtW->size2, UtW->size2); + gsl_vector *WHiy = gsl_vector_safe_alloc(UtW->size2); + gsl_matrix *Vbeta = gsl_matrix_safe_alloc(UtW->size2, UtW->size2); gsl_matrix_set_zero(Uab); CalcUab(UtW, Uty, Uab); @@ -2262,13 +1921,13 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, // Calculate basic quantities. size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2; - gsl_vector *x = gsl_vector_alloc(U->size1); - gsl_vector *x_miss = gsl_vector_alloc(U->size1); - gsl_vector *Utx = gsl_vector_alloc(U->size2); - gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); + gsl_vector *x = gsl_vector_safe_alloc(U->size1); + gsl_vector *x_miss = gsl_vector_safe_alloc(U->size1); + gsl_vector *Utx = gsl_vector_safe_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_safe_alloc(n_index); - gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2); + gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2); gsl_matrix_view UtW_expand_mat = gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW); @@ -2278,24 +1937,24 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, // Start reading genotypes and analyze. for (size_t t = 0; t < indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); + safeGetline(infile, line).eof(); if (t % d_pace == 0 || t == (ns_total - 1)) { - ProgressBar("Reading SNPs ", t, ns_total - 1); + ProgressBar("Reading SNPs", t, ns_total - 1); } if (indicator_snp[t] == 0) { continue; } - ch_ptr = strtok((char *)line.c_str(), " , \t"); - ch_ptr = strtok(NULL, " , \t"); - ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok_safe((char *)line.c_str(), " , \t"); + ch_ptr = strtok_safe(NULL, " , \t"); + ch_ptr = strtok_safe(NULL, " , \t"); x_mean = 0.0; c_phen = 0; n_miss = 0; gsl_vector_set_zero(x_miss); for (size_t i = 0; i < ni_total; ++i) { - ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok_safe(NULL, " , \t"); if (indicator_idv[i] == 0) { continue; } @@ -2390,8 +2049,8 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env) { - debug_msg("entering"); string file_bed = file_bfile + ".bed"; + debug_msg(file_bed); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { cout << "error reading bed file:" << file_bed << endl; @@ -2412,12 +2071,12 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, // Calculate basic quantities. size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2; - gsl_vector *x = gsl_vector_alloc(U->size1); - gsl_vector *Utx = gsl_vector_alloc(U->size2); - gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); + gsl_vector *x = gsl_vector_safe_alloc(U->size1); + gsl_vector *Utx = gsl_vector_safe_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_safe_alloc(n_index); - gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2); + gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2); gsl_matrix_view UtW_expand_mat = gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW); @@ -2440,7 +2099,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { if (t % d_pace == 0 || t == snpInfo.size() - 1) { - ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); + ProgressBar("Reading SNPs", t, snpInfo.size() - 1); } if (indicator_snp[t] == 0) { continue; |