Browse Source

Splitter: first rewrite

georgeg
Pjotr Prins 4 years ago
parent
commit
4bac3c5e45
  1. 58
      bio/std/range/splitter.d

58
bio/std/range/splitter.d

@ -71,49 +71,47 @@ unittest {
assert(array(SimpleSplitConv!(ubyte[])(cast(ubyte[])"chr1:55365,55365,1")) == ["chr1:55365","55365","1"]);
}
struct FastSplitConv(R)
if (isInputRange!R)
{
R list, split_on;
this(R range, R splits_on = cast(R)SPLIT_ON) {
list = range;
split_on = splits_on;
}
int opApply(scope int delegate(R) dg) {
size_t start = 0;
bool in_whitespace = false;
foreach(size_t pos, c; list) {
if (canFind(split_on,c)) { // hit split char
if (!in_whitespace) { // emit
auto token = list[start..pos];
dg(token);
}
start = pos+1;
in_whitespace = true;
} else {
in_whitespace = false;
R[] fast_splitter(R)(R range, R splits_on = cast(R)SPLIT_ON) {
R[] tokens = new R[range.length]; // pre-allocate optimistially
auto j = 0, prev_j = 0;
bool in_whitespace = false;
auto token_num = 0;
for (; j<range.length ;) {
if (canFind(splits_on,range[j])) { // hit split char
if (!in_whitespace && j>0) {
tokens[token_num] = range[prev_j..j];
token_num++;
}
prev_j = j+1;
in_whitespace = true;
}
if (!in_whitespace) { // emit final
auto token = list[start..$];
dg(token);
else {
in_whitespace = false;
}
return 0;
j++;
}
if (!in_whitespace) { // emit final
tokens[token_num] = range[prev_j..$];
token_num++;
}
tokens.length = token_num;
return tokens;
}
unittest {
auto s = cast(ubyte[])"hello 1 2 \t3 4 \n";
auto s = "hello 1 2 \t3 4 \n";
writeln(fast_splitter(s).map!"to!string(a)");
for (int x = 0; x < 4_000_000; x++) {
assert(array(FastSplitConv!(ubyte[])(s)) == ["hello","1","2","3","4"]);
assert(fast_splitter(s) == ["hello", "1", "2", "3", "4"]);
// assert(array(FastSplitConv!(ubyte[])(cast(ubyte[])" hello, 1 2 \t3 4 \n")) == ["","hello","1","2","3","4"]);
// assert(array(FastSplitConv!(ubyte[])(cast(ubyte[])"hello, 1 2 \n\t3 4 \n")) == ["hello","1","2","3","4"]);
// assert(array(FastSplitConv!(ubyte[])(cast(ubyte[])"chr1:55365,55365,1")) == ["chr1:55365","55365","1"]);
}
/*
real 0m2.675s
user 0m2.676s
sys 0m0.000s
real 0m3.733s
user 0m3.736s
sys 0m0.000s

Loading…
Cancel
Save