Browse Source

Splitter even faster

real    0m1.357s
user    0m1.356s
sys     0m0.004s
georgeg
Pjotr Prins 4 years ago
parent
commit
8981178af4
  1. 42
      bio/std/range/splitter.d

42
bio/std/range/splitter.d

@ -71,8 +71,12 @@ unittest {
assert(array(SimpleSplitConv!(ubyte[])(cast(ubyte[])"chr1:55365,55365,1")) == ["chr1:55365","55365","1"]);
}
R[] fast_splitter(R)(R range, R splits_on = cast(R)SPLIT_ON) {
R[] tokens = new R[range.length]; // pre-allocate optimistially
/*
Dirty fast_splitter is 3x faster than above elegant version. It does one
heap allocation if the buffer of 16K is not enough.
*/
R[] fast_splitter(R)(R[] tokens, R range, R splits_on = cast(R)SPLIT_ON) {
// R[] tokens = new R[range.length]; // pre-allocate optimistially
auto j = 0, prev_j = 0;
bool in_whitespace = false;
auto token_num = 0;
@ -85,8 +89,8 @@ R[] fast_splitter(R)(R range, R splits_on = cast(R)SPLIT_ON) {
break;
}
}
if (found) { // hit split char
if (!in_whitespace && j>0) {
if (found) {
if (!in_whitespace) {
tokens[token_num] = range[prev_j..j];
token_num++;
}
@ -102,30 +106,20 @@ R[] fast_splitter(R)(R range, R splits_on = cast(R)SPLIT_ON) {
tokens[token_num] = range[prev_j..$];
token_num++;
}
tokens.length = token_num;
return tokens;
// tokens.length = token_num;
return tokens[0..token_num];
}
unittest {
auto s = "hello 1 2 \t3 4 \n";
writeln(fast_splitter(s).map!"to!string(a)");
string[16384] tokens;
writeln(fast_splitter(tokens,s).map!"to!string(a)");
for (int x = 0; x < 4_000_000; x++) {
assert(fast_splitter(s) == ["hello", "1", "2", "3", "4"]);
// assert(array(FastSplitConv!(ubyte[])(cast(ubyte[])" hello, 1 2 \t3 4 \n")) == ["","hello","1","2","3","4"]);
// assert(array(FastSplitConv!(ubyte[])(cast(ubyte[])"hello, 1 2 \n\t3 4 \n")) == ["hello","1","2","3","4"]);
// assert(array(FastSplitConv!(ubyte[])(cast(ubyte[])"chr1:55365,55365,1")) == ["chr1:55365","55365","1"]);
assert(fast_splitter(tokens,s) == ["hello", "1", "2", "3", "4"]);
/*
assert(fast_splitter(" hello, 1 2 \t3 4 \n") == ["","hello","1","2","3","4"]);
assert(fast_splitter("hello, 1 2 \n\t3 4 \n") == ["hello","1","2","3","4"]);
assert(fast_splitter("chr1:55365,55365,1") == ["chr1:55365","55365","1"]);
*/
}
/*
real 0m1.731s
user 0m1.732s
sys 0m0.000s
real 0m2.675s
user 0m2.676s
sys 0m0.000s
real 0m3.733s
user 0m3.736s
sys 0m0.000s
*/
}

Loading…
Cancel
Save