diff options
Diffstat (limited to 'src/gemma_io.cpp')
| -rw-r--r-- | src/gemma_io.cpp | 410 |
1 files changed, 256 insertions, 154 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 7d1f0ca..e630f64 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -2,7 +2,7 @@ Genome-wide Efficient Mixed Model Association (GEMMA) Copyright © 2011-2017, Xiang Zhou Copyright © 2017, Peter Carbonetto - Copyright © 2017-2020, Pjotr Prins + Copyright © 2017-2025, Pjotr Prins This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,6 +41,7 @@ #include "gsl/gsl_linalg.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" +#include <lmdb++.h> #include "checkpoint.h" #include "debug.h" @@ -61,7 +62,7 @@ void ProgressBar(string str, double p, double total, double ratio) { const double progress = (100.0 * p / total); const uint barsize = (int)(progress / 2.0); // characters // cout << barsize << endl; - // cout << str << " "; + cout << str << " "; // cout << p << "/" << total << endl; assert(barsize < 101); // corrupted data somehow if (barsize > 0) { @@ -153,6 +154,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) { // Read SNP file. A single column of SNP names. bool ReadFile_snps(const string file_snps, set<string> &setSnps) { debug_msg("enter ReadFile_snps"); + checkpoint("read-file-snps",file_snps); setSnps.clear(); igzstream infile(file_snps.c_str(), igzstream::in); @@ -182,6 +184,7 @@ bool ReadFile_snps(const string file_snps, set<string> &setSnps) { bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) { debug_msg("entered"); setSnps.clear(); + checkpoint("read-file-header",file_snps); igzstream infile(file_snps.c_str(), igzstream::in); if (!infile) { @@ -239,6 +242,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) { // Read log file. bool ReadFile_log(const string &file_log, double &pheno_mean) { debug_msg("ReadFile_log"); + checkpoint("read-file-log",file_log); ifstream infile(file_log.c_str(), ifstream::in); if (!infile) { cout << "error! fail to open log file: " << file_log << endl; @@ -282,6 +286,8 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM) { debug_msg("ReadFile_anno"); + checkpoint("read-file-anno",file_anno); + mapRS2chr.clear(); mapRS2bp.clear(); @@ -345,6 +351,7 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr, bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv, vector<double> &pheno, const int &p_column) { debug_msg("entered"); + checkpoint("read-file-column",file_pheno); indicator_idv.clear(); pheno.clear(); @@ -389,6 +396,7 @@ bool ReadFile_pheno(const string &file_pheno, vector<vector<double>> &pheno, const vector<size_t> &p_column) { debug_msg("entered"); + checkpoint("read-file-pheno",file_pheno); indicator_pheno.clear(); pheno.clear(); @@ -398,35 +406,36 @@ bool ReadFile_pheno(const string &file_pheno, return false; } - string line; - char *ch_ptr; - - string id; + // string id; double p; vector<double> pheno_row; - vector<int> ind_pheno_row; + + string line; + char *ch_ptr; + + vector<int> indicator_pheno_row; size_t p_max = *max_element(p_column.begin(), p_column.end()); map<size_t, size_t> mapP2c; for (size_t i = 0; i < p_column.size(); i++) { mapP2c[p_column[i]] = i; pheno_row.push_back(-9); - ind_pheno_row.push_back(0); + indicator_pheno_row.push_back(0); } while (!safeGetline(infile, line).eof()) { ch_ptr = strtok((char *)line.c_str(), " ,\t"); size_t i = 0; while (i < p_max) { - enforce_msg(ch_ptr,"Number of phenotypes in pheno file do not match phenotypes in geno file"); + enforce_msg(ch_ptr,"Number of phenotypes in pheno file do not match selected columns"); if (mapP2c.count(i + 1) != 0) { if (strcmp(ch_ptr, "NA") == 0) { - ind_pheno_row[mapP2c[i + 1]] = 0; + indicator_pheno_row[mapP2c[i + 1]] = 0; // skip this trait row pheno_row[mapP2c[i + 1]] = -9; } else { p = atof(ch_ptr); - ind_pheno_row[mapP2c[i + 1]] = 1; + indicator_pheno_row[mapP2c[i + 1]] = 1; // use this trait row pheno_row[mapP2c[i + 1]] = p; } } @@ -434,7 +443,7 @@ bool ReadFile_pheno(const string &file_pheno, ch_ptr = strtok(NULL, " ,\t"); } - indicator_pheno.push_back(ind_pheno_row); + indicator_pheno.push_back(indicator_pheno_row); pheno.push_back(pheno_row); } @@ -448,6 +457,7 @@ bool ReadFile_pheno(const string &file_pheno, bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt, vector<vector<double>> &cvt, size_t &n_cvt) { debug_msg("entered"); + checkpoint("read-file-cvt",file_cvt); indicator_cvt.clear(); ifstream infile(file_cvt.c_str(), ifstream::in); @@ -515,6 +525,7 @@ bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt, // Read .bim file. bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) { + checkpoint("read-file-bim",file_bim); debug_msg("entered"); snpInfo.clear(); @@ -563,6 +574,7 @@ bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) { bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno, vector<vector<double>> &pheno, map<string, int> &mapID2num, const vector<size_t> &p_column) { + checkpoint("read-file-fam",file_fam); debug_msg("entered"); indicator_pheno.clear(); pheno.clear(); @@ -639,13 +651,14 @@ bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno, return true; } + // Read bimbam mean genotype file, the first time, to obtain #SNPs for // analysis (ns_test) and total #SNP (ns_total). /* ## Modified (Mutated) Parameters: 1. **`indicator_idv`** (vector<int>&) - - Actually **not modified** - only read from to determine which individuals to include + - **Not modified** - only read from to determine which individuals to include 2. **`indicator_snp`** (vector<int>&) - **Modified**: Cleared at the start (`indicator_snp.clear()`) @@ -678,14 +691,15 @@ bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno, The function returns a `bool` indicating success/failure, but the real outputs are these three modified reference parameters. */ -bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, - const gsl_matrix *W, const vector<int> &indicator_idv, - vector<int> &indicator_snp, const double &maf_level, - const double &miss_level, const double &hwe_level, - const double &r2_level, map<string, string> &mapRS2chr, - map<string, long int> &mapRS2bp, - map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, - size_t &ns_test) { +bool ReadFile_bimbam_geno(const string &file_geno, const set<string> &setSnps, + const gsl_matrix *W, const vector<int> &indicator_idv, + vector<int> &indicator_snp, const double &maf_level, + const double &miss_level, const double &hwe_level, + const double &r2_level, map<string, string> &mapRS2chr, + map<string, long int> &mapRS2bp, + map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, + size_t &ns_test) { + checkpoint("read-file-geno",file_geno); debug_msg("entered"); indicator_snp.clear(); snpInfo.clear(); @@ -724,9 +738,8 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, double cM; size_t file_pos; - double maf, geno, geno_old; + double geno, geno_old; size_t n_miss; - size_t n_0, n_1, n_2; double min_g = std::numeric_limits<float>::max(); double max_g = std::numeric_limits<float>::min(); @@ -737,6 +750,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, for (int i = 0; i < ni_total; ++i) { ni_test += indicator_idv[i]; } + // assert(ni_test == ni_total); // we use indicator_idv? ns_test = 0; file_pos = 0; @@ -783,15 +797,14 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, } // Start on a new marker/SNP - maf = 0; n_miss = 0; flag_poly = 0; geno_old = -9; - n_0 = 0; - n_1 = 0; - n_2 = 0; c_idv = 0; gsl_vector_set_zero(genotype_miss); + double maf = 0.0; + size_t n_0=0,n_1=0,n_2=0; + auto infilen = file_geno.c_str(); for (int i = 0; i < ni_total; ++i) { // read genotypes ch_ptr = strtok_safe2(NULL, " ,\t",(string("To many individuals/strains/genometypes in pheno file for ")+infilen).c_str()); @@ -808,19 +821,12 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, } geno = atof(ch_ptr); - if (geno >= 0 && geno <= 0.5) { - n_0++; - } - if (geno > 0.5 && geno < 1.5) { - n_1++; - } - if (geno >= 1.5 && geno <= 2.0) { - n_2++; - } gsl_vector_set(genotype, c_idv, geno); - if (geno < min_g) min_g = geno; - if (geno > max_g) max_g = geno; + if (geno < min_g) + min_g = geno; + if (geno > max_g) + max_g = geno; // going through genotypes with 0.0 < geno < 2.0 if (flag_poly == 0) { // first init in marker @@ -831,10 +837,20 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, flag_poly = 1; // genotypes differ } + // compute ratios + if (hwe_level != 0 && maf_level != -1) { + if (geno >= 0 && geno <= 0.5) + n_0++; + if (geno > 0.5 && geno < 1.5) + n_1++; + if (geno >= 1.5 && geno <= 2.0) + n_2++; + } maf += geno; c_idv++; } + maf /= 2.0 * (double)(ni_test - n_miss); SNPINFO sInfo = {chr, rs, @@ -871,8 +887,6 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, continue; } } - - // -r2 flag for (size_t i = 0; i < genotype->size; ++i) { if (gsl_vector_get(genotype_miss, i) == 1) { @@ -880,7 +894,6 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, gsl_vector_set(genotype, i, geno); } } - gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx); gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); gsl_blas_ddot(genotype, genotype, &v_x); @@ -924,6 +937,7 @@ bool ReadFile_bed(const string &file_bed, const set<string> &setSnps, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test) { + checkpoint("read-file-bed",file_bed); debug_msg("entered"); indicator_snp.clear(); size_t ns_total = snpInfo.size(); @@ -1232,6 +1246,7 @@ void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv, void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G) { + checkpoint("read-file-kin",file_kin); debug_msg("entered"); igzstream infile(file_kin.c_str(), igzstream::in); if (!infile) { @@ -1335,7 +1350,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv, infile.close(); infile.clear(); - checkpoint("read-kinship-file",file_kin); + checkpoint("end-read-file-kin",file_kin); return; } @@ -1461,6 +1476,197 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) { return; } + +// Read bimbam mean genotype file and calculate kinship matrix. +gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string loco) { + checkpoint("mdb-kin",file_geno); + bool is_loco = !loco.empty(); + + // Convert loco string to what we use in the chrpos index + uint8_t loco_chr = 0; + if (is_loco) { + if (loco == "X") { + loco_chr = CHR_X; + } else if (loco == "Y") { + loco_chr = CHR_Y; + } else if (loco == "M") { + loco_chr = CHR_M; + } else { + try { + loco_chr = static_cast<uint8_t>(stoi(loco)); + } catch (...) { + loco_chr = 0; + } + } + } + + /* Create and open the LMDB environment: */ + auto env = lmdb::env::create(); + + env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */ + env.set_max_dbs(10); + env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664); + string format; + + size_t ni_total = 0, num_markers = 0; + { + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + auto info = lmdb::dbi::open(rtxn, "info"); + + string_view meta; + if (info.get(rtxn, "meta", meta)) { + cout << meta << endl; + } else { + cout << "meta not found!" << endl; + } + std::string_view view_int; + info.get(rtxn, "numsamples", view_int); + ni_total = lmdb::from_sv<size_t>(view_int); + string_view info_key,info_value; + info.get(rtxn,"format",info_value); + format = string(info_value); + + MDB_stat stat; + mdb_stat(rtxn, info, &stat); + assert(stat.ms_entries == 5); + } + + auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); + + auto geno_mdb = lmdb::dbi::open(rtxn, "geno"); + + MDB_stat stat; + mdb_stat(rtxn, geno_mdb, &stat); + // cout << "Number of records: " << stat.ms_entries << endl; + num_markers = stat.ms_entries; + + gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(ni_total,ni_total); + gsl_matrix_set_zero(matrix_kin); + gsl_vector *geno = gsl_vector_safe_alloc(ni_total); + gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_total); + + // Xlarge contains inds x markers + gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_total, K_BATCH_SIZE); + enforce_msg(Xlarge, "allocate Xlarge ni_total x K_BATCH_SIZE"); + if (is_check_mode()) + gsl_matrix_set_zero(Xlarge); + + // [ ] check XLarge is zeroed properly + // [ ] handle missing data + + // For every marker read the genotype x inds + size_t ns_test = 0; + size_t display_pace = 1000; + + cout << "## number of total individuals = " << ni_total << endl; + cout << "## number of analyzed individuals = " << ni_total << endl; + cout << "## number of analyzed SNPs/var = " << num_markers << endl; + + auto cursor = lmdb::cursor::open(rtxn, geno_mdb); + + auto mdb_fetch = MDB_FIRST; + for (size_t t = 0; t < num_markers; ++t) { + string line; + if (t % display_pace == 0) { + ProgressBar("Reading SNPs", t, num_markers - 1); + } + string_view key, value; + cursor.get(key, value, mdb_fetch); + mdb_fetch = MDB_NEXT; + + const uint8_t* data = reinterpret_cast<const uint8_t*>(key.data()); + auto chr = static_cast<uint8_t>(data[1]); + + if (is_loco && loco_chr == chr) + continue; + + // ---- Depending on the format we get different buffers - currently float and byte are supported: + vector<double> gs; + if (format == "Gb") { + size_t num_bytes = value.size() / sizeof(uint8_t); + assert(num_bytes == ni_total); + auto size = num_bytes; + const uint8_t* gs_bbuf = reinterpret_cast<const uint8_t*>(value.data()); + gs.reserve(size); + for (size_t i = 0; i < size; ++i) { + double g = static_cast<double>(gs_bbuf[i])/127.0; + gs.push_back(g); + } + } else { + size_t num_floats = value.size() / sizeof(float); + assert(num_floats == ni_total); + auto size = num_floats; + const float* gs_fbuf = reinterpret_cast<const float*>(value.data()); + gs.reserve(size); + for (size_t i = 0; i < size; ++i) { + gs.push_back(static_cast<double>(gs_fbuf[i])); + } + } + + size_t n_miss = 0; + double geno_mean = 0.0; + double geno_var = 0.0; + gsl_vector_set_all(geno_miss, 0); + + for (size_t i = 0; i < ni_total; ++i) { + double d = gs[i]; + gsl_vector_set(geno, i, d); + gsl_vector_set(geno_miss, i, 1); + geno_mean += d; + geno_var += d * d; + } + + geno_mean /= (double)(ni_total - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_total; + geno_var -= geno_mean * geno_mean; + + // impute missing values by plugging in the mean + for (size_t i = 0; i < ni_total; ++i) { + if (gsl_vector_get(geno_miss, i) == 0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + // subtract the mean (centering genotype values) + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + // scale the genotypes + if (k_mode == 2 && geno_var != 0) { // some confusion here, -gk 2 + // flag does this + gsl_vector_scale(geno, 1.0 / sqrt(geno_var)); + } + + // set the SNP column ns_test to copy geno into the compute matrix Xlarge + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % K_BATCH_SIZE); + enforce_gsl(gsl_vector_memcpy(&Xlarge_col.vector, geno)); + + ns_test++; + + // Every K_BATCH_SIZE rows batch compute kinship matrix and return + // by adding to matrix_kin + if (ns_test % K_BATCH_SIZE == 0) { + fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); + gsl_matrix_set_zero(Xlarge); + write(matrix_kin,"K updated"); + } + } + if (ns_test % K_BATCH_SIZE != 0) { // compute last batch + fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); + write(matrix_kin,"K updated"); + } + ProgressBar("Reading SNPs", num_markers, num_markers); + cout << endl; + + enforce_gsl(gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test)); + + gsl_vector_free(geno); + gsl_vector_free(geno_miss); + gsl_matrix_free(Xlarge); + + return matrix_kin; +} + // Read bimbam mean genotype file and calculate kinship matrix. bool BimbamKin(const string file_geno, const set<string> ksnps, vector<int> &indicator_snp, const int k_mode, @@ -1470,7 +1676,7 @@ bool BimbamKin(const string file_geno, const set<string> ksnps, auto infilen = file_geno.c_str(); igzstream infile(infilen, igzstream::in); enforce_msg(infilen, "error reading genotype file"); - checkpoint("read-geno-file",file_geno); + checkpoint("bimbam-kin",file_geno); size_t n_miss; double geno_mean, geno_var; @@ -1792,6 +1998,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { + checkpoint("read-file-geno",file_geno); debug_msg("entered"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { @@ -1893,121 +2100,12 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, return true; } -// Compact version of the above function, using uchar instead of -// gsl_matrix. -bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, - vector<int> &indicator_snp, - vector<vector<unsigned char>> &Xt, gsl_matrix *K, - const bool calc_K, const size_t ni_test, - const size_t ns_test) { - debug_msg("entered"); - igzstream infile(file_geno.c_str(), igzstream::in); - if (!infile) { - cout << "error reading genotype file:" << file_geno << endl; - return false; - } - - Xt.clear(); - vector<unsigned char> Xt_row; - for (size_t i = 0; i < ni_test; i++) { - Xt_row.push_back(0); - } - - string line; - char *ch_ptr; - - if (calc_K == true) { - gsl_matrix_set_zero(K); - } - - gsl_vector *genotype = gsl_vector_safe_alloc(ni_test); - gsl_vector *genotype_miss = gsl_vector_safe_alloc(ni_test); - double geno, geno_mean; - size_t n_miss; - - size_t ni_total = indicator_idv.size(); - size_t ns_total = indicator_snp.size(); - - size_t c_idv = 0, c_snp = 0; - - auto infilen = file_geno.c_str(); - for (size_t i = 0; i < ns_total; ++i) { - safeGetline(infile, line).eof(); - if (indicator_snp[i] == 0) { - continue; - } - - ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen); - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - - c_idv = 0; - geno_mean = 0; - n_miss = 0; - gsl_vector_set_zero(genotype_miss); - for (uint j = 0; j < ni_total; ++j) { - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - if (indicator_idv[j] == 0) { - continue; - } - - if (strcmp(ch_ptr, "NA") == 0) { - gsl_vector_set(genotype_miss, c_idv, 1); - n_miss++; - } else { - geno = atof(ch_ptr); - gsl_vector_set(genotype, c_idv, geno); - geno_mean += geno; - } - c_idv++; - } - - geno_mean /= (double)(ni_test - n_miss); - - for (size_t j = 0; j < genotype->size; ++j) { - if (gsl_vector_get(genotype_miss, j) == 1) { - geno = geno_mean; - } else { - geno = gsl_vector_get(genotype, j); - } - - Xt_row[j] = Double02ToUchar(geno); - gsl_vector_set(genotype, j, (geno - geno_mean)); - } - Xt.push_back(Xt_row); - - if (calc_K == true) { - gsl_blas_dsyr(CblasUpper, 1.0, genotype, K); - } - - c_snp++; - } - - if (calc_K == true) { - gsl_matrix_scale(K, 1.0 / (double)ns_test); - - for (size_t i = 0; i < genotype->size; ++i) { - for (size_t j = 0; j < i; ++j) { - geno = gsl_matrix_get(K, j, i); - gsl_matrix_set(K, i, j, geno); - } - } - } - - gsl_vector_free(genotype); - gsl_vector_free(genotype_miss); - - infile.clear(); - infile.close(); - - return true; -} - // Read bimbam mean genotype file, the second time, recode "mean" // genotype and calculate K. bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { + checkpoint("read-file-bed",file_bed); debug_msg("entered"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { @@ -2141,6 +2239,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test) { + checkpoint("read-file-bed",file_bed); debug_msg("entered"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { @@ -2277,6 +2376,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, bool ReadFile_est(const string &file_est, const vector<size_t> &est_column, map<string, double> &mapRS2est) { + checkpoint("read-file-est",file_est); debug_msg("entered"); mapRS2est.clear(); @@ -2344,7 +2444,8 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column, } bool CountFileLines(const string &file_input, size_t &n_lines) { - debug_msg("entered"); + checkpoint("count-lines",file_input); + igzstream infile(file_input.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open file: " << file_input << endl; @@ -2361,6 +2462,7 @@ bool CountFileLines(const string &file_input, size_t &n_lines) { // Read gene expression file. bool ReadFile_gene(const string &file_gene, vector<double> &vec_read, vector<SNPINFO> &snpInfo, size_t &ng_total) { + checkpoint("read-file-gene",file_gene); debug_msg("entered"); vec_read.clear(); ng_total = 0; |
