diff options
-rw-r--r-- | gn2/utility/helper_functions.py | 45 | ||||
-rw-r--r-- | gn2/wqflask/templates/gnqa.html | 72 | ||||
-rw-r--r-- | gn2/wqflask/templates/gnqa_answer.html | 40 | ||||
-rw-r--r-- | gn2/wqflask/templates/gsearch_gene.html | 9 | ||||
-rw-r--r-- | gn2/wqflask/views.py | 12 |
5 files changed, 114 insertions, 64 deletions
diff --git a/gn2/utility/helper_functions.py b/gn2/utility/helper_functions.py index fc101959..8c35df5f 100644 --- a/gn2/utility/helper_functions.py +++ b/gn2/utility/helper_functions.py @@ -8,6 +8,51 @@ from gn2.utility.tools import get_setting from gn2.wqflask.database import database_connection + +def clean_xapian_query(query: str) -> str: + """ + Clean and optimize a Xapian query string by removing filler words, + and ensuring the query is tailored for optimal results from Fahamu. + + Args: + query (str): The original Xapian query string. + + Returns: + str: The cleaned and optimized query string. + """ + xapian_prefixes = { + "author", + "species", + "group", + "tissue", + "dataset", + "symbol", + "description", + "rif", + "wiki", + } + xapian_operators = {"AND", "NOT", "OR", "XOR", "NEAR", "ADJ"} + range_prefixes = {"mean", "peak", "position", "peakmb", "additive", "year"} + query_context = ["genes"] + cleaned_query_parts = [] + for token in query.split(): + if token in xapian_operators: + continue + prefix, _, suffix = token.partition(":") + if ".." in suffix and prefix in range_prefixes: + continue + if prefix in xapian_prefixes: + query_context.insert(0, prefix) + cleaned_query_parts.append(f"{prefix} {suffix}") + else: + cleaned_query_parts.append(prefix) + cleaned_query = " ".join(cleaned_query_parts) + context = ",".join(query_context) + return f"Provide answer on {cleaned_query} context {context}" + + + + def get_species_dataset_trait(self, start_vars): if "temp_trait" in list(start_vars.keys()): if start_vars['temp_trait'] == "True": diff --git a/gn2/wqflask/templates/gnqa.html b/gn2/wqflask/templates/gnqa.html index 158b0b4e..371ad7b3 100644 --- a/gn2/wqflask/templates/gnqa.html +++ b/gn2/wqflask/templates/gnqa.html @@ -80,10 +80,15 @@ background-color: #ccf; } +.linux-libertine-font{ + font-family: 'Linux Libertine','Georgia','Times','Source Serif Pro', 'serif' +} + </style> {% endblock %} {% block search %}{% endblock %} {% block content %} + <!-- Start of body --> <section class="container-fluid"> <header class="row"> @@ -119,39 +124,44 @@ autocomplete="on" required placeholder="Ask More Questions or Topics (E.g Genes)" + {% if query %} + value='{{ query }}' + {% else %} value='' + {% endif %} name="querygnqa" /> </div> </form> <article id="swap" class="row"> - <div class="row gnqa-copy"> - <div class="col-sm-10 col-sm-offset-1 col-md-offset-3 col-md-6"> - <p> - Welcome to the GeneNetwork Question and Answer (GNQA)system. We utilize a large language model and 3000 scientific publications to make GNQA a subject matter expert in three areas: <b><a href="/">GeneNetwork.org</a></b>, <b>genomics/genetics with regards to diabetes</b> and <b>genomics/genetics with regards to agin.</b>. - </p> - <p> - At the moment when you ask GNQA something it will attempt to return a sensible answer with <q>real</q> references. To this end we aim to reduce hallucinations and provide a knowledge launchpad for a researcher to enhance their knowledge on the relevant subject matter. - </p> - <p>GNQA is not a finished product as we are working diligently to improve it daily.</p> - <p> - <b>Thanks for using GNQA!</b> - </p> - <div></div> - </article> - </section> - {% endblock %} - {% block js %} - <script src="{{ url_for('js', filename='jquery/jquery.min.js') }}" - type="text/javascript"></script> - <script language="javascript" - type="text/javascript" - src="{{ url_for('js', filename='jquery-ui/jquery-ui.min.js') }}"></script> - <script language="javascript" - type="text/javascript" - src="{{ url_for('js', filename='htmx.min.js') }}"></script> - <script type="text/javascript"> - document.addEventListener('DOMContentLoaded', function() { - $('footer').hide() - }); - </script> - {% endblock %} + {% if answer %} + {% include 'gnqa_answer.html' %} + {% else %} + + <div class="row gnqa-copy"> + <div class="col-sm-10 col-sm-offset-1 col-md-offset-3 col-md-6"> + <p> + Welcome to the GeneNetwork Question and Answer (GNQA)system. We utilize a large language model and 3000 scientific publications to make GNQA a subject matter expert in three areas: <b><a href="/">GeneNetwork.org</a></b>, <b>genomics/genetics with regards to diabetes</b> and <b>genomics/genetics with regards to agin.</b>. + </p> + <p> + At the moment when you ask GNQA something it will attempt to return a sensible answer with <q>real</q> references. To this end we aim to reduce hallucinations and provide a knowledge launchpad for a researcher to enhance their knowledge on the relevant subject matter. + </p> + <p>GNQA is not a finished product as we are working diligently to improve it daily.</p> + <p> + <b>Thanks for using GNQA!</b> + </p> + </div> + </div> + {% endif %} + </article> + </section> + {% endblock %} + {% block js %} + <script src="{{ url_for('js', filename='jquery/jquery.min.js') }}" type="text/javascript"></script> + <script src="{{ url_for('js', filename='jquery-ui/jquery-ui.min.js') }}" type="text/javascript"></script> + <script src="{{ url_for('js', filename='htmx.min.js') }}" type="text/javascript" ></script> + <script type="text/javascript"> +document.addEventListener('DOMContentLoaded', function() { + $('footer').hide() +}); + </script> + {% endblock %} diff --git a/gn2/wqflask/templates/gnqa_answer.html b/gn2/wqflask/templates/gnqa_answer.html index 91fa4981..2e182375 100644 --- a/gn2/wqflask/templates/gnqa_answer.html +++ b/gn2/wqflask/templates/gnqa_answer.html @@ -1,7 +1,7 @@ <section class="container-fluid answers gnqa-copy"> <div class="row container gnqa-answer" style="margin-bottom: 1em"> <p class="row lead"> - <mark style="font-family: 'Linux Libertine','Georgia','Times','Source Serif Pro',serif"><b><i>{{ query }}</i></b></mark> + <mark class="linux-libertine-font"><b><i>{{ query }}</i></b></mark> <br /> <span style="white-space: pre-line">{{ answer|safe }}</span> </p> @@ -20,32 +20,24 @@ title="Vote Down"> <i class="fa fa-thumbs-down fa-sm fa-1x" aria-hidden="true"></i> </button> - <sub id="rate" class="text-info"> - </sub> + <sub id="rate" class="text-info"></sub> </div> </div> <div class="row container"> <details open> <summary> - <h3 style="font-family: 'Linux Libertine','Georgia','Times','Source Serif Pro',serif">References</h3> + <h3 class="linux-libertine-font">References</h3> </summary> {% if references %} <ul class="list-unstyled"> {% for reference in references %} <li> - <div class="panel-group" - role="tablist" - aria-multiselectable="true" - style="margin-bottom:0"> + <div class="panel-group" role="tablist" aria-multiselectable="true" style="margin-bottom:0"> <div class="panel panel-default"> {% if loop.first %} - <div class="panel-heading active" - role="tab" - id="heading{{ reference.doc_id }}"> - <h4 class="panel-title" - style="font-family: 'Linux Libertine','Georgia','Times','Source Serif Pro',serif"> - <a class="collapsed" - role="button" + <div class="panel-heading active" role="tab" id="heading{{ reference.doc_id }}"> + <h4 class="panel-title linux-libertine-font"> + <a class="collapsed" role="button" data-toggle="collapse" data-parent="#accordion" href="#collapse{{ reference.doc_id }}" @@ -65,8 +57,7 @@ {% if reference.pubmed %} <details open> <summary>See PubMed Info</summary> - <div style="font-family: 'Linux Libertine','Georgia','Times','Source Serif Pro',serif; - margin-top:1.4em"> + <div class="linux-libertine-font" style="margin-top:1.4em"> <h3> <b>{{ reference.pubmed[0].get("title") }}:</b> </h3> @@ -90,8 +81,7 @@ </div> {% else %} <div class="panel-heading" role="tab" id="heading{{ reference.doc_id }}"> - <h4 class="panel-title" - style="font-family: 'Linux Libertine','Georgia','Times','Source Serif Pro',serif"> + <h4 class="panel-title linux-libertine-font"> <a class="collapsed" role="button" data-toggle="collapse" @@ -113,8 +103,7 @@ {% if reference.pubmed %} <details> <summary>See PubMed Info</summary> - <div style="font-family: 'Linux Libertine','Georgia','Times','Source Serif Pro',serif; - margin-top:1.4em"> + <div class="linux-libertine-font" style="margin-top:1.4em"> <h3> <b>{{ reference.pubmed[0].get("title") }}:</b> </h3> @@ -154,8 +143,9 @@ </div> </div> </section> -{% block js %} - <script> + +<script> +$(document).ready(function() { function updateRatingHandler(target, responseObj, args){ let {status, response} = responseObj.xhr if (status == 200 && args == "upvote"){ @@ -187,5 +177,5 @@ htmx.on("#upvote", "click", function(evt){ handler: (target, obj)=> updateRatingHandler(target,obj, "downvote"), swap:"innerHTML", values: {'query': query, 'answer': answer}})}); - </script> -{% endblock %} +}) +</script> diff --git a/gn2/wqflask/templates/gsearch_gene.html b/gn2/wqflask/templates/gsearch_gene.html index 091cc435..80ed8b47 100644 --- a/gn2/wqflask/templates/gsearch_gene.html +++ b/gn2/wqflask/templates/gsearch_gene.html @@ -16,8 +16,14 @@ </h3> </div> + + {% if do_ai_search %} <div class="row" id="ai_result"> + <div class="text-center" id="spinner"> + <i class="fa fa-spinner fa-spin fa-3x"></i> + </div> </div> + {% endif %} <p>To study a record, click on its Record ID below.<br />Check records below and click Add button to add to selection.</p> <div> @@ -306,6 +312,9 @@ success: function(result) { let ai_div = ai_content_div(result.search_term, result.search_result, result.search_url) $("#ai_result").append(ai_div); + }, + complete: function() { + $("#spinner").hide(); } }) }) diff --git a/gn2/wqflask/views.py b/gn2/wqflask/views.py index ec1c2b74..d5fcedfe 100644 --- a/gn2/wqflask/views.py +++ b/gn2/wqflask/views.py @@ -89,6 +89,7 @@ from gn2.utility.tools import GN3_LOCAL_URL from gn2.utility.tools import JS_TWITTER_POST_FETCHER_PATH from gn2.utility.tools import JS_GUIX_PATH from gn2.utility.helper_functions import get_species_groups +from gn2.utility.helper_functions import clean_xapian_query from gn2.utility.redis_tools import get_redis_conn import gn2.utility.hmac as hmac @@ -268,12 +269,6 @@ def gsearchtable(): return flask.jsonify(current_page) -def clean_xapian_query(query: str) -> str: - """ Remove filler words in xapian query - TODO: FIXME - """ - return query - @app.route("/gnqna", methods=["POST", "GET"]) @require_oauth2 @@ -298,8 +293,9 @@ def gnqna(): query_type = request.args.get("type") if query_type == "xapian": query = clean_xapian_query(query) + # todo; check if is empty safe_query = urllib.parse.urlencode({"query": query}) - search_result = requests.put( + search_result = requests.get( urljoin(GN3_LOCAL_URL, f"/api/llm/search?{safe_query}"), headers={"Authorization": f"Bearer {token}"}, ) @@ -312,7 +308,7 @@ def gnqna(): "search_url": f"/gnqna?{safe_query}", } return jsonify(ai_result) - return render_template("gnqa_answer.html", **search_result) + return render_template("gnqa.html", **search_result) else: return render_template("gnqa.html") if request.method == "POST": |