aboutsummaryrefslogtreecommitdiff
path: root/gn2/wqflask/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn2/wqflask/parser.py')
-rw-r--r--gn2/wqflask/parser.py91
1 files changed, 91 insertions, 0 deletions
diff --git a/gn2/wqflask/parser.py b/gn2/wqflask/parser.py
new file mode 100644
index 00000000..ddf48d90
--- /dev/null
+++ b/gn2/wqflask/parser.py
@@ -0,0 +1,91 @@
+"""
+Parses search terms input by user
+
+Searches take two primary forms:
+- search term by itself (ex. "shh" or "brain")
+- key / separator / value(s) (ex. "LRS=(9 99 Chr4 122 155)" or "GO:342533")
+
+In the example of "LRS=(9 99 Chr4 122 155)", the key is "LRS", the separator is "=" and the value
+is everything within the parentheses.
+
+Both "=" and ":" can be used as separators; in the future, it would also be good to allow no
+separator at all (ex. "cisLRS(9 999 10)")
+
+Both square brackets and parentheses can be used interchangeably. Both can also be used to
+encapsulate a single value; "cisLRS=[9 999 10)" would
+be acceptable.]
+
+"""
+
+import re
+
+from pprint import pformat as pf
+
+
+def parse(pstring):
+ """
+
+ returned item search_term is always a list, even if only one element
+ """
+ pstring = re.split(r"""(?:(\w+\s*=\s*[\('"\[][^)'"]*[\)\]'"]) | # LRS=(1 2 3), cisLRS=[4 5 6], etc
+ (\w+\s*[=:\>\<][\w\*]+) | # wiki=bar, GO:foobar, etc
+ (".*?") | ('.*?') | # terms in quotes, i.e. "brain weight"
+ ([\w\*\?\-]+)) # shh, brain, etc """, pstring,
+ flags=re.VERBOSE)
+
+ pstring = [item.strip() for item in pstring if item and item.strip()]
+
+ items = []
+
+ separators = [re.escape(x) for x in ("<=", ">=", ":", "=", "<", ">")]
+ separators = '(%s)' % ("|".join(separators))
+
+ for item in pstring:
+ splat = re.split(separators, item)
+
+ # splat is an array of 1 if no match, otherwise more than 1
+ if len(splat) > 1:
+ key, separator, value = splat
+ if '(' in value or '[' in value:
+ assert value.startswith(("(", "[")), "Invalid token"
+ assert value.endswith((")", "]")), "Invalid token"
+ value = value[1:-1] # Get rid of the parenthesis
+ values = re.split(r"""\s+|,""", value)
+ value = [value.strip() for value in values if value.strip()]
+ else:
+ value = [value]
+ # : is a synonym for =
+ if separator == ":":
+ separator = "="
+
+ term = dict(key=key,
+ separator=separator,
+ search_term=value)
+ else:
+ if (item[0] == "\"" and item[-1] == "\"") or (item[0] == "'" and item[-1] == "'"):
+ item = item[1:-1]
+ term = dict(key=None,
+ separator=None,
+ search_term=[item])
+
+ items.append(term)
+ return(items)
+
+
+if __name__ == '__main__':
+ parse("foo=[3 2 1]")
+ parse("WIKI=ho*")
+ parse("LRS>9")
+ parse("LRS>=18")
+ parse("NAME='rw williams'")
+ parse('NAME="rw williams"')
+ parse("foo <= 2")
+ parse("cisLRS<20")
+ parse("foo=[3 2 1)")
+ parse("foo=(3 2 1)")
+ parse("shh")
+ parse("shh grep")
+ parse("LRS=(9 99 Chr4 122 155) cisLRS=(9 999 10)")
+ parse("sal1 LRS=(9 99 Chr4 122 155) sal2 cisLRS=(9 999 10)")
+ parse("sal1 sal3 LRS=(9 99 Chr4 122 155) wiki=bar sal2 go:foobar cisLRS=(9 999 10)")
+ parse("sal1 LRS=(9 99 Chr4 122 155) wiki=bar sal2 go:foobar cisLRS=(9, 999, 10)")