aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander_Kabui2024-09-12 17:59:00 +0300
committerAlexander_Kabui2024-09-12 17:59:00 +0300
commit26df13607eec77f35d8f2da28cb88c197fcf1b9c (patch)
tree0a9cbd929d04479c423219145ba1d7ec5a055e04
parent5acb80117c002b72dc591e0ce0c3083a2695bf0a (diff)
downloadgenenetwork2-26df13607eec77f35d8f2da28cb88c197fcf1b9c.tar.gz
feat: implement text transformer for xapian searches.
-rw-r--r--gn2/wqflask/views.py43
1 files changed, 40 insertions, 3 deletions
diff --git a/gn2/wqflask/views.py b/gn2/wqflask/views.py
index f723958a..bcab1187 100644
--- a/gn2/wqflask/views.py
+++ b/gn2/wqflask/views.py
@@ -268,11 +268,47 @@ def gsearchtable():
return flask.jsonify(current_page)
+
def clean_xapian_query(query: str) -> str:
- """ Remove filler words in xapian query
- TODO: FIXME
"""
- return query
+ Clean and optimize a Xapian query string by removing filler words,
+ and ensuring the query is tailored for optimal results from Fahamu.
+
+ Args:
+ query (str): The original Xapian query string.
+
+ Returns:
+ str: The cleaned and optimized query string.
+ """
+ xapian_prefixes = {
+ "author",
+ "species",
+ "group",
+ "tissue",
+ "dataset",
+ "symbol",
+ "description",
+ "rif",
+ "wiki",
+ }
+ xapian_operators = {"AND", "NOT", "OR", "XOR", "NEAR", "ADJ"}
+ range_prefixes = {"mean", "peak", "position", "peakmb", "additive", "year"}
+ query_context = ["genes"]
+ cleaned_query_parts = []
+ for token in query.split():
+ if token in xapian_operators or any(
+ prefix in token for prefix in range_prefixes if ".." in token
+ ):
+ continue
+ prefix, _, suffix = token.partition(":")
+ if prefix in xapian_prefixes:
+ query_context.insert(0, prefix)
+ cleaned_query_parts.append(f"{prefix} {suffix}")
+ else:
+ cleaned_query_parts.append(prefix)
+ cleaned_query = " ".join(cleaned_query_parts)
+ context = ",".join(query_context)
+ return f"Provide answer on {cleaned_query} context {context}"
@app.route("/gnqna", methods=["POST", "GET"])
@@ -298,6 +334,7 @@ def gnqna():
query_type = request.args.get("type")
if query_type == "xapian":
query = clean_xapian_query(query)
+ # todo; check if is empty
safe_query = urllib.parse.urlencode({"query": query})
search_result = requests.put(
urljoin(GN3_LOCAL_URL, f"/api/llm/search?{safe_query}"),