aboutsummaryrefslogtreecommitdiff
path: root/wqflask/wqflask/parser.py
blob: 74343b8ad26ca5b522047c09ed7c709652583288 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from __future__ import print_function, division

import re

from pprint import pformat as pf


def parse(pstring):
    pstring = re.split(r"""(?:(\w+\s*=\s*\([^)]*\))|(\w+\s*[=:]\w+)|(\w+))""", pstring)
    pstring = [item.strip() for item in pstring if item and item.strip()]
    print(pstring)

    items = []

    for item in pstring:
        if ":" in item:
            key, seperator, value = item.partition(':')
        elif "=" in item:
            key, seperator, value = item.partition('=')
        else:
            seperator = None

        if seperator:
            if '(' in value:
                assert value.startswith("("), "Invalid token"
                assert value.endswith(")"), "Invalid token"
                value = value[1:-1] # Get rid of the parenthesis
                values = re.split(r"""\s+|,""", value)
                value = [value.strip() for value in values if value.strip()]
            # Brackets can also be used to encapsulate values
            elif '[' in value:
                assert value.startswith("["), "Invalid token"
                assert value.endswith("]"), "Invalid token"
                value = value[1:-1] # Get rid of the brackets
                values = re.split(r"""\s+|,""", value)
                value = [value.strip() for value in values if value.strip()]                
            term = dict(key=key,
                        seperator=seperator,
                        search_term=value)
        else:
            term = dict(key=None,
                        seperator=None,
                        search_term = item)

        items.append(term)
    print(pf(items))
    return(items)

if __name__ == '__main__':
    parse("foo=(3 2 1)")
    parse("shh")
    parse("shh grep")
    parse("LRS=(9 99 Chr4 122 155) cisLRS=(9 999 10)")
    parse("sal1 LRS=(9 99 Chr4 122 155) sal2 cisLRS=(9 999 10)")
    parse("sal1 sal3 LRS=(9 99 Chr4 122 155) wiki=bar sal2 go:foobar cisLRS=(9 999 10)")
    parse("sal1 LRS=(9 99 Chr4 122 155) wiki=bar sal2 go:foobar cisLRS=(9, 999, 10)")