Browse Source

newick tree parser

remotes/georgeg/no_streams
lomereiter 9 years ago
parent
commit
6fdfe7c049
  1. 125
      bio/newick/parser.d
  2. 88
      bio/newick/tree.d
  3. 14
      bio/newick/treenode.d

125
bio/newick/parser.d

@ -0,0 +1,125 @@
module bio.newick.parser;
import bio.newick.treenode;
import std.exception;
import std.format;
import std.ascii;
import std.array;
class NewickParseException : Exception {
this(string msg) {
super(msg);
}
}
NewickNode* parse(string s) {
string str = s;
return parseTree(str);
}
private {
void skipWhiteSpace(ref string s) {
while (true) {
if (s.empty)
throw new NewickParseException("String is empty");
if (isWhite(s.front))
s = s[1 .. $];
else
break;
}
}
NewickNode* parseTree(ref string s) {
skipWhiteSpace(s);
auto ch = s.front;
if (ch == ';')
return new NewickNode;
auto result = parseSubtree(s);
enforce(s.front == ';', "String does not end with ';'");
return result;
}
NewickNode* parseSubtree(ref string s) {
skipWhiteSpace(s);
auto ch = s.front;
switch (ch) {
case '(':
return parseInternalNode(s);
case ',':
case ')':
return new NewickNode;
default:
enforce(isAlphaNum(ch), "Node name contains invalid character");
return parseLeafNode(s);
}
}
NewickNode* parseLeafNode(ref string s, NewickNode* nd=null) {
skipWhiteSpace(s);
auto ch = s.front;
auto node = nd is null ? (new NewickNode) : nd;
size_t i = 0;
while (isAlphaNum(s[i])) {
++i;
}
if (i > 0)
node.name = s[0 .. i];
s = s[i .. $];
ch = s.front;
if (ch == ':')
formattedRead(s, ":%s", &node.distance_to_parent);
return node;
}
NewickNode* parseInternalNode(ref string s) {
skipWhiteSpace(s);
auto ch = s.front;
assert(ch == '(');
s.popFront();
auto node = new NewickNode;
auto children = Appender!(NewickNode*[])();
while (true) {
auto child = parseSubtree(s);
children.put(child);
ch = s.front;
if (ch == ',') {
s.popFront();
continue;
} else if (ch == ')') {
s.popFront();
break;
} else {
throw new NewickParseException("Parse error");
}
}
node.children = children.data;
parseLeafNode(s, node);
return node;
}
}

88
bio/newick/tree.d

@ -0,0 +1,88 @@
module bio.newick.tree;
import bio.newick.parser;
import bio.newick.treenode;
/**
Newick tree
*/
class NewickTree {
private {
NewickNode* _root;
}
/** Root node, may be null */
NewickNode* root() @property {
return _root;
}
/** Creates a tree from its string representation */
static NewickTree fromString(string str) {
auto tree = new NewickTree();
tree._root = bio.newick.parser.parse(str);
return tree;
}
}
unittest {
import std.math;
auto tree = NewickTree.fromString("(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);");
assert(tree.root !is null);
assert(tree.root.name is null);
assert(tree.root.children.length == 3);
assert(tree.root.children[0].name == "A");
assert(approxEqual(tree.root.children[0].distance_to_parent, 0.1));
assert(tree.root.children[1].name == "B");
assert(approxEqual(tree.root.children[1].distance_to_parent, 0.2));
auto child = tree.root.children[2];
assert(child !is null);
assert(child.name is null);
assert(approxEqual(child.distance_to_parent, 0.5));
assert(child.children.length == 2);
assert(child.children[0].name == "C");
assert(approxEqual(child.children[0].distance_to_parent, 0.3));
assert(child.children[1].name == "D");
assert(approxEqual(child.children[1].distance_to_parent, 0.4));
tree = NewickTree.fromString("(A,B,(C,D));");
assert(tree.root !is null);
assert(tree.root.children.length == 3);
assert(tree.root.children[0].name == "A");
assert(tree.root.children[1].name == "B");
assert(tree.root.children[2].name is null);
assert(tree.root.children[2].children[0].name == "C");
assert(tree.root.children[2].children[1].name == "D");
tree = NewickTree.fromString("((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;");
assert(tree.root.name == "A");
assert(tree.root.children.length == 1);
child = tree.root.children[0];
assert(child.name == "F");
assert(approxEqual(child.distance_to_parent, 0.1));
assert(child.children.length == 2);
assert(child.children[0].name == "B");
assert(approxEqual(child.children[0].distance_to_parent, 0.2));
child = child.children[1];
assert(child.name == "E");
assert(approxEqual(child.distance_to_parent, 0.5));
assert(child.children.length == 2);
assert(child.children[0].name == "C");
assert(approxEqual(child.children[0].distance_to_parent, 0.3));
assert(child.children[1].name == "D");
assert(approxEqual(child.children[1].distance_to_parent, 0.4));
tree = NewickTree.fromString("(,,(,));");
assert(tree.root !is null);
assert(tree.root.children.length == 3);
assert(tree.root.children[0].name is null);
assert(tree.root.children[0].children.length == 0);
assert(tree.root.children[1].name is null);
assert(tree.root.children[1].children.length == 0);
child = tree.root.children[2];
assert(child.name is null);
assert(child.children.length == 2);
assert(child.children[0].name is null);
assert(child.children[1].name is null);
}

14
bio/newick/treenode.d

@ -0,0 +1,14 @@
module bio.newick.treenode;
/** Node in a Newick tree */
struct NewickNode {
/** Node name (null if node is not named) */
string name;
/** Distance to parent node (NaN if not specified) */
double distance_to_parent;
/** Child nodes */
NewickNode*[] children;
}
Loading…
Cancel
Save