diff options
author | John Nduli | 2024-09-12 15:19:54 +0300 |
---|---|---|
committer | BonfaceKilz | 2024-09-12 18:34:56 +0300 |
commit | c71f464c783ca49587a21a52054cb7237b7deb0c (patch) | |
tree | 573f84e6449d551884848a1188145b346bd97b01 /gn2/wqflask | |
parent | 51109f6dd78ea9d1b40ca97024a70e44e108b769 (diff) | |
download | genenetwork2-c71f464c783ca49587a21a52054cb7237b7deb0c.tar.gz |
feat: add rough implementation for xapian query cleaner
Diffstat (limited to 'gn2/wqflask')
-rw-r--r-- | gn2/wqflask/views.py | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/gn2/wqflask/views.py b/gn2/wqflask/views.py index a63eacb0..e695b0d8 100644 --- a/gn2/wqflask/views.py +++ b/gn2/wqflask/views.py @@ -270,9 +270,21 @@ def gsearchtable(): def clean_xapian_query(query: str) -> str: """ Remove filler words in xapian query + This is a temporary solution that works for some query. A better solution is being worked on. TODO: FIXME """ - return query + xapian_prefixes = set(["author", "species", "group", "tissue", "dataset", "symbol", "description", "rif", "wiki"]) + range_prefixes = set(["mean", "peak", "position", "peakmb", "additive", "year"]) + final_query = [] + for word in query.split(): + split_word = word.split(":") + if len(split_word) > 0 and split_word[0].lower() in xapian_prefixes: + final_query.append(split_word[1]) + continue + if split_word[0].lower() in range_prefixes: + # no need to search for ranges + continue + return " ".join(final_query) @app.route("/gnqna", methods=["POST", "GET"]) |