aboutsummaryrefslogtreecommitdiff
path: root/gn3
diff options
context:
space:
mode:
Diffstat (limited to 'gn3')
-rw-r--r--gn3/api/llm.py188
-rw-r--r--gn3/errors.py18
-rw-r--r--gn3/llms/client.py57
-rw-r--r--gn3/llms/errors.py7
-rw-r--r--gn3/llms/process.py121
5 files changed, 197 insertions, 194 deletions
diff --git a/gn3/api/llm.py b/gn3/api/llm.py
index 442252f..952a5b9 100644
--- a/gn3/api/llm.py
+++ b/gn3/api/llm.py
@@ -1,125 +1,117 @@
"""Api endpoints for gnqa"""
-from datetime import timedelta
import json
-import sqlite3
-from redis import Redis
-
+from datetime import datetime, timezone
from flask import Blueprint
from flask import current_app
from flask import jsonify
from flask import request
from gn3.llms.process import get_gnqa
-from gn3.llms.process import get_user_queries
-from gn3.llms.process import fetch_query_results
from gn3.llms.errors import LLMError
from gn3.auth.authorisation.oauth2.resource_server import require_oauth
from gn3.auth import db
+
gnqa = Blueprint("gnqa", __name__)
-@gnqa.route("/gnqna", methods=["POST"])
-def gnqna():
- """Main gnqa endpoint"""
+@gnqa.route("/search", methods=["PUT"])
+def search():
+ """Api endpoint for searching queries in fahamu Api"""
query = request.json.get("querygnqa", "")
if not query:
return jsonify({"error": "querygnqa is missing in the request"}), 400
-
- try:
- fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN")
- if fahamu_token is None:
- return jsonify({"query": query,
- "error": "Use of invalid fahamu auth token"}), 500
- task_id, answer, refs = get_gnqa(
- query, fahamu_token, current_app.config.get("DATA_DIR"))
- response = {
- "task_id": task_id,
- "query": query,
- "answer": answer,
- "references": refs
- }
- with (Redis.from_url(current_app.config["REDIS_URI"],
- decode_responses=True) as redis_conn):
- redis_conn.setex(
- f"LLM:random_user-{query}",
- timedelta(days=10), json.dumps(response))
- return jsonify({
- **response,
- "prev_queries": get_user_queries("random_user", redis_conn)
- })
- except LLMError as error:
- return jsonify({"query": query,
- "error": f"Request failed-{str(error)}"}), 500
+ fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN")
+ if not fahamu_token:
+ raise LLMError(
+ "Request failed:an LLM authorisation token is required ", query)
+ task_id, answer, refs = get_gnqa(
+ query, fahamu_token, current_app.config.get("DATA_DIR"))
+ response = {
+ "task_id": task_id,
+ "query": query,
+ "answer": answer,
+ "references": refs
+ }
+ with (db.connection(current_app.config["LLM_DB_PATH"]) as conn,
+ require_oauth.acquire("profile user") as token):
+ cursor = conn.cursor()
+ cursor.execute("""CREATE TABLE IF NOT EXISTS
+ history(user_id TEXT NOT NULL,
+ task_id TEXT NOT NULL,
+ query TEXT NOT NULL,
+ results TEXT,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ PRIMARY KEY(task_id)) WITHOUT ROWID""")
+ cursor.execute(
+ """INSERT INTO history(user_id, task_id, query, results)
+ VALUES(?, ?, ?, ?)
+ """, (str(token.user.user_id), str(task_id["task_id"]),
+ query,
+ json.dumps(response))
+ )
+ return response
@gnqa.route("/rating/<task_id>", methods=["POST"])
@require_oauth("profile")
-def rating(task_id):
- """Endpoint for rating qnqa query and answer"""
- try:
- llm_db_path = current_app.config["LLM_DB_PATH"]
- with (require_oauth.acquire("profile") as token,
- db.connection(llm_db_path) as conn):
-
- results = request.json
- user_id, query, answer, weight = (token.user.user_id,
- results.get("query"),
- results.get("answer"),
- results.get("weight", 0))
- cursor = conn.cursor()
- create_table = """CREATE TABLE IF NOT EXISTS Rating(
- user_id TEXT NOT NULL,
- query TEXT NOT NULL,
- answer TEXT NOT NULL,
- weight INTEGER NOT NULL DEFAULT 0,
- task_id TEXT NOT NULL UNIQUE
- )"""
- cursor.execute(create_table)
- cursor.execute("""INSERT INTO Rating(user_id,query,
- answer,weight,task_id)
- VALUES(?,?,?,?,?)
- ON CONFLICT(task_id) DO UPDATE SET
- weight=excluded.weight
- """, (str(user_id), query, answer, weight, task_id))
+def rate_queries(task_id):
+ """Api endpoint for rating GNQA query and answer"""
+ with (require_oauth.acquire("profile") as token,
+ db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+ results = request.json
+ user_id, query, answer, weight = (token.user.user_id,
+ results.get("query"),
+ results.get("answer"),
+ results.get("weight", 0))
+ cursor = conn.cursor()
+ create_table = """CREATE TABLE IF NOT EXISTS Rating(
+ user_id TEXT NOT NULL,
+ query TEXT NOT NULL,
+ answer TEXT NOT NULL,
+ weight INTEGER NOT NULL DEFAULT 0,
+ task_id TEXT NOT NULL UNIQUE,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ PRIMARY KEY(task_id))"""
+ cursor.execute(create_table)
+ cursor.execute("""INSERT INTO Rating(user_id, query,
+ answer, weight, task_id, created_at)
+ VALUES(?, ?, ?, ?, ?, ?)
+ ON CONFLICT(task_id) DO UPDATE SET
+ weight=excluded.weight
+ """, (str(user_id), query, answer, weight, task_id,
+ datetime.now(timezone.utc)))
return {
- "message":
- "You have successfully rated this query:Thank you!!"
+ "message": "You have successfully rated this query.Thank you!"
}, 200
- except sqlite3.Error as error:
- return jsonify({"error": str(error)}), 500
-@gnqa.route("/history/<query>", methods=["GET"])
+@gnqa.route("/history", methods=["GET", "DELETE"])
@require_oauth("profile user")
-def fetch_user_hist(query):
- """"Endpoint to fetch previos searches for User"""
- with (require_oauth.acquire("profile user") as the_token,
- Redis.from_url(current_app.config["REDIS_URI"],
- decode_responses=True) as redis_conn):
- return jsonify({
- **fetch_query_results(query, the_token.user.id, redis_conn),
- "prev_queries": get_user_queries("random_user", redis_conn)
- })
-
-
-@gnqa.route("/historys/<query>", methods=["GET"])
-def fetch_users_hist_records(query):
- """method to fetch all users hist:note this is a test functionality
- to be replaced by fetch_user_hist
- """
- with Redis.from_url(current_app.config["REDIS_URI"],
- decode_responses=True) as redis_conn:
- return jsonify({
- **fetch_query_results(query, "random_user", redis_conn),
- "prev_queries": get_user_queries("random_user", redis_conn)
- })
-
-
-@gnqa.route("/get_hist_names", methods=["GET"])
-def fetch_prev_hist_ids():
- """Test method for fetching history for Anony Users"""
- with (Redis.from_url(current_app.config["REDIS_URI"],
- decode_responses=True)) as redis_conn:
- return jsonify({"prev_queries": get_user_queries("random_user",
- redis_conn)})
+def fetch_prev_history():
+ """Api endpoint to fetch GNQA previous search."""
+ with (require_oauth.acquire("profile user") as token,
+ db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+ cursor = conn.cursor()
+ if request.method == "DELETE":
+ task_ids = list(request.json.values())
+ query = """DELETE FROM history
+ WHERE task_id IN ({})
+ and user_id=?""".format(",".join("?" * len(task_ids)))
+ cursor.execute(query, (*task_ids, str(token.user.user_id),))
+ return jsonify({})
+ elif (request.method == "GET" and
+ request.args.get("search_term")):
+ cursor.execute(
+ """SELECT results from history
+ Where task_id=? and user_id=?""",
+ (request.args.get("search_term"),
+ str(token.user.user_id),))
+ record = cursor.fetchone()
+ if record:
+ return dict(record).get("results")
+ return {}
+ cursor.execute(
+ """SELECT task_id,query from history WHERE user_id=?""",
+ (str(token.user.user_id),))
+ return jsonify([dict(item) for item in cursor.fetchall()])
diff --git a/gn3/errors.py b/gn3/errors.py
index 1833bf6..c53604f 100644
--- a/gn3/errors.py
+++ b/gn3/errors.py
@@ -16,7 +16,7 @@ from authlib.oauth2.rfc6749.errors import OAuth2Error
from flask import Flask, jsonify, Response, current_app
from gn3.auth.authorisation.errors import AuthorisationError
-
+from gn3.llms.errors import LLMError
def add_trace(exc: Exception, jsonmsg: dict) -> dict:
"""Add the traceback to the error handling object."""
@@ -106,6 +106,21 @@ def handle_generic(exc: Exception) -> Response:
return resp
+def handle_llm_error(exc: Exception) -> Response:
+ """ Handle llm erros if not handled anywhere else. """
+ current_app.logger.error(exc)
+ resp = jsonify({
+ "query": exc.args[1],
+ "error_type": type(exc).__name__,
+ "error": (
+ exc.args[0] if bool(exc.args) else "Fahamu gnqa error occurred"
+ ),
+ "trace": traceback.format_exc()
+ })
+ resp.status_code = 500
+ return resp
+
+
def register_error_handlers(app: Flask):
"""Register application-level error handlers."""
app.register_error_handler(NotFound, page_not_found)
@@ -115,6 +130,7 @@ def register_error_handlers(app: Flask):
app.register_error_handler(AuthorisationError, handle_authorisation_error)
app.register_error_handler(RemoteDisconnected, internal_server_error)
app.register_error_handler(URLError, url_server_error)
+ app.register_error_handler(LLMError, handle_llm_error)
for exc in (
EndPointInternalError,
EndPointNotFound,
diff --git a/gn3/llms/client.py b/gn3/llms/client.py
index d57bca2..ad6c400 100644
--- a/gn3/llms/client.py
+++ b/gn3/llms/client.py
@@ -55,6 +55,7 @@ class GeneNetworkQAClient(Session):
self.base_url = "https://genenetwork.fahamuai.com/api/tasks"
self.answer_url = f"{self.base_url}/answers"
self.feedback_url = f"{self.base_url}/feedback"
+ self.query = ""
adapter = TimeoutHTTPAdapter(
timeout=timeout,
@@ -83,44 +84,44 @@ class GeneNetworkQAClient(Session):
""" handler for non 200 response from fahamu api"""
return f"Error: Status code -{response.status_code}- Reason::{response.reason}"
- def ask(self, ex_url, *args, **kwargs):
+ def ask(self, ex_url, query, *args, **kwargs):
"""fahamu ask api interface"""
+ self.query = query
res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs)
- if res.status_code != 200:
- return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
return res, json.loads(res.text)
def get_answer(self, taskid, *args, **kwargs):
"""Fahamu get answer interface"""
- try:
- query = f"{self.answer_url}?task_id={taskid['task_id']}"
- res = self.custom_request('GET', query, *args, **kwargs)
- if res.status_code != 200:
- return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
- return res, 1
- except TimeoutError:
- return "Timeout error occured:try to rephrase your query", 0
+ query = f"{self.answer_url}?task_id={taskid['task_id']}"
+ res = self.custom_request('GET', query, *args, **kwargs)
+ return res, 1
def custom_request(self, method, url, *args, **kwargs):
""" make custom request to fahamu api ask and get response"""
max_retries = 50
retry_delay = 3
+ response_msg = {
+ 404: "Api endpoint Does not exist",
+ 500: "Use of Invalid Token/or the Fahamu Api is currently down",
+ 400: "You sent a bad Fahamu request",
+ 401: "You do not have authorization to perform the request",
+ }
for _i in range(max_retries):
- try:
- response = super().request(method, url, *args, **kwargs)
- response.raise_for_status()
- if response.ok:
- if method.lower() == "get" and response.json().get("data") is None:
- time.sleep(retry_delay)
- continue
- return response
- else:
+ response = super().request(method, url, *args, **kwargs)
+ if response.ok:
+ if method.lower() == "get" and not response.json().get("data"):
+ # note this is a dirty trick to check if fahamu has returned the results
+ # the issue is that the api only returns 500 or 200 satus code
+ # TODO: fix this on their end
time.sleep(retry_delay)
- except requests.exceptions.HTTPError as error:
- if error.response.status_code == 500:
- raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error
- raise LLMError(error.request, error.response,
- f"HTTP error occurred with error status:{error.response.status_code}") from error
- except requests.exceptions.RequestException as error:
- raise error
- raise TimeoutError
+ continue
+ return response
+ else:
+ raise LLMError(f"Request error with code:\
+ {response.status_code} occurred with reason:\
+ {response_msg.get(response.status_code,response.reason)}",
+ self.query)
+ #time.sleep(retry_delay)
+ raise LLMError("Timeout error: We couldn't provide a response,Please try\
+ to rephrase your question to receive feedback",
+ self.query)
diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py
index af3d7b0..a3a47a3 100644
--- a/gn3/llms/errors.py
+++ b/gn3/llms/errors.py
@@ -35,8 +35,5 @@ class UnprocessableEntity(HTTPError):
msg, request=request, response=response)
-class LLMError(HTTPError):
- """Custom error from making Fahamu APi request """
- def __init__(self, request, response, msg):
- super(HTTPError, self).__init__(
- msg, request=request, response=response)
+class LLMError(Exception):
+ """custom exception for LLMErrorMIxins"""
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 4725bcb..55c27a0 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -1,6 +1,7 @@
"""this module contains code for processing response from fahamu client.py"""
# pylint: disable=C0301
import os
+import re
import string
import json
import logging
@@ -21,21 +22,12 @@ class DocIDs():
* doc_ids.json: opens doc)ids for gn references
* sugar_doc_ids: open doci_ids for diabetes references
"""
- self.doc_ids = self.load_file("doc_ids.json")
- self.sugar_doc_ids = self.load_file("all_files.json")
+ self.doc_ids = load_file("doc_ids.json", BASEDIR)
+ self.sugar_doc_ids = load_file("all_files.json", BASEDIR)
self.format_doc_ids(self.sugar_doc_ids)
- def load_file(self, file_name):
- """Method to load and read doc_id files"""
- file_path = os.path.join(BASEDIR, file_name)
- if os.path.isfile(file_path):
- with open(file_path, "rb") as file_handler:
- return json.load(file_handler)
- else:
- raise FileNotFoundError(f"{file_path}-- FIle does not exist\n")
-
def format_doc_ids(self, docs):
- """method to format doc_ids for list items"""
+ """method to format doc_ids for list items doc_id and doc_name"""
for _key, val in docs.items():
if isinstance(val, list):
for doc_obj in val:
@@ -43,7 +35,14 @@ class DocIDs():
self.doc_ids.update({doc_obj["id"]: doc_name})
def get_info(self, doc_id):
- """ interface to make read from doc_ids"""
+ """ interface to make read from doc_ids
+ and extract info data else returns
+ doc_id
+ Args:
+ doc_id: str: a search key for doc_ids
+ Returns:
+ an object with doc_info if doc_id in doc_ids
+ """
if doc_id in self.doc_ids.keys():
return self.doc_ids[doc_id]
else:
@@ -51,7 +50,8 @@ class DocIDs():
def format_bibliography_info(bib_info):
- """Function for formatting bibliography info"""
+ """Utility function for formatting bibliography info
+ """
if isinstance(bib_info, str):
return bib_info.removesuffix('.txt')
elif isinstance(bib_info, dict):
@@ -59,14 +59,16 @@ def format_bibliography_info(bib_info):
return bib_info
-def filter_response_text(val):
- """helper function for filtering non-printable chars"""
- return json.loads(''.join([str(char)
- for char in val if char in string.printable]))
-
-
def parse_context(context, get_info_func, format_bib_func):
- """function to parse doc_ids content"""
+ """Function to parse doc_ids content
+ Args:
+ context: raw references from fahamu api
+ get_info_func: function to get doc_ids info
+ format_bib_func: function to foramt bibliography info
+ Returns:
+ an list with each item having (doc_id,bib_info,
+ combined reference text)
+ """
results = []
for doc_ids, summary in context.items():
combo_txt = ""
@@ -75,13 +77,23 @@ def parse_context(context, get_info_func, format_bib_func):
doc_info = get_info_func(doc_ids)
bib_info = doc_ids if doc_ids == doc_info else format_bib_func(
doc_info)
+ pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*'
+ combo_text = re.sub(pattern,
+ lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
+ combo_txt)
results.append(
- {"doc_id": doc_ids, "bibInfo": bib_info, "comboTxt": combo_txt})
+ {"doc_id": doc_ids, "bibInfo": bib_info,
+ "comboTxt": combo_text})
return results
def load_file(filename, dir_path):
- """function to open and load json file"""
+ """Utility function to read json file
+ Args:
+ filename: file name to read
+ dir_path: base directory for the file
+ Returns: json data read to a dict
+ """
file_path = os.path.join(dir_path, f"{filename}")
if not os.path.isfile(file_path):
raise FileNotFoundError(f"{filename} was not found or is a directory")
@@ -90,8 +102,19 @@ def load_file(filename, dir_path):
def fetch_pubmed(references, file_name, data_dir=""):
- """method to fetch and populate references with pubmed"""
+ """
+ Fetches PubMed data from a JSON file and populates the\
+ references dictionary.
+
+ Args:
+ references (dict): Dictionary with document IDs as keys\
+ and reference data as values.
+ filename (str): Name of the JSON file containing PubMed data.
+ data_dir (str): Base directory where the data files are located.
+ Returns:
+ dict: Updated references dictionary populated with the PubMed data.
+ """
try:
pubmed = load_file(file_name, os.path.join(data_dir, "gn-meta/lit"))
for reference in references:
@@ -116,42 +139,16 @@ def get_gnqa(query, auth_token, data_dir=""):
answer
references: contains doc_name,reference,pub_med_info
"""
-
api_client = GeneNetworkQAClient(api_key=auth_token)
- res, task_id = api_client.ask('?ask=' + quote(query), auth_token)
- if task_id == 0:
- raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
- res, status = api_client.get_answer(task_id)
- if status == 1:
- resp_text = filter_response_text(res.text)
- if resp_text.get("data") is None:
- return task_id, "Please try to rephrase your question to receive feedback", []
- answer = resp_text['data']['answer']
- context = resp_text['data']['context']
- references = parse_context(
- context, DocIDs().get_info, format_bibliography_info)
- references = fetch_pubmed(references, "pubmed.json", data_dir)
-
- return task_id, answer, references
- else:
- return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", []
-
-
-def fetch_query_results(query, user_id, redis_conn):
- """this method fetches prev user query searches"""
- result = redis_conn.get(f"LLM:{user_id}-{query}")
- if result:
- return json.loads(result)
- return {
- "query": query,
- "answer": "Sorry No answer for you",
- "references": [],
- "task_id": None
- }
-
-
-def get_user_queries(user_id, redis_conn):
- """methos to fetch all queries for a specific user"""
- results = redis_conn.keys(f"LLM:{user_id}*")
- return [query for query in
- [result.partition("-")[2] for result in results] if query != ""]
+ res, task_id = api_client.ask('?ask=' + quote(query), query=query)
+ res, _status = api_client.get_answer(task_id)
+ resp_text = json.loads(''.join([str(char)
+ for char in res.text if char in string.printable]))
+ answer = re.sub(r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*',
+ lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
+ resp_text["data"]["answer"])
+ context = resp_text['data']['context']
+ return task_id, answer, fetch_pubmed(parse_context(
+ context, DocIDs().get_info,
+ format_bibliography_info),
+ "pubmed.json", data_dir)