diff options
author | Pjotr Prins | 2018-09-19 07:44:12 +0000 |
---|---|---|
committer | Pjotr Prins | 2018-09-19 07:44:12 +0000 |
commit | 2b86b6423c33923a385a341fb4d12b7c38b7c924 (patch) | |
tree | 1cb635c651894c6a54b7416df636af786ccf2241 | |
parent | 21bd19fd1aa8204cd17f74d971d003a0657f37f1 (diff) | |
download | pangemma-2b86b6423c33923a385a341fb4d12b7c38b7c924.tar.gz |
Replace safeGetline with native version
-rw-r--r-- | src/gemma_io.cpp | 18 | ||||
-rw-r--r-- | test/performance/releases.org | 14 |
2 files changed, 27 insertions, 5 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 405bbff..d92dc44 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -96,10 +96,24 @@ token_list tokenize_whitespace(const string line, uint num, const char *infilen) return v; } +// Faster version of safeGetline because of less copying and char +// iteration. Note behaviour differs somewhat when it comes to eol. I +// think the version had to deal with files from differing platforms. +// You can still run that with -legacy switch. +inline istream &safe_get_line(istream &is, string &t) { + if (is_legacy_mode()) return safeGetline(is,t); + + std::getline(is,t); + // if(!is.fail); + return is; +} bool isBlankLine(std::string const &line) { return isBlankLine(line.c_str()); } -// In case files are ended with "\r" or "\r\n". +// In case files are ended with "\r" or "\r\n". safeGetline fetches +// lines from a stream and returns them in t. It returns stream so it +// can be tested for eof. This function is a bottleneck in legacy gemma +// and can be replaced with safe_get_line. std::istream &safeGetline(std::istream &is, std::string &t) { t.clear(); @@ -679,7 +693,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, file_pos = 0; auto count_warnings = 0; auto infilen = file_geno.c_str(); - while (!safeGetline(infile, line).eof()) { + while (!safe_get_line(infile, line).eof()) { ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen); rs = ch_ptr; ch_ptr = strtok_safe2(NULL, " ,\t",infilen); diff --git a/test/performance/releases.org b/test/performance/releases.org index 4f1f68b..2419960 100644 --- a/test/performance/releases.org +++ b/test/performance/releases.org @@ -57,9 +57,17 @@ user 0m37.788s sys 0m2.168s #+END_SRC -there is still some scope for improvement by changing safeGetLine and -do_strtok_safe methods as well as less string copying during -tokenization. I may get to that at some point. +Replacing safeGetLine also made some difference + +#+BEGIN_SRC +real 0m15.659s +user 0m34.896s +sys 0m1.500s +#+END_SRC + +there is still some scope for improvement by changing do_strtok_safe +methods as well as less string copying during tokenization. I may get +to that at some point. Running the GNU profiler on the MVLMM one rendered |