From 715241466e6426c2f3650e6cd9e0990078ad80f5 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 15 May 2024 19:00:12 +0300 Subject: Pep8 formatting for llm Api file. --- gn3/api/llm.py | 64 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 7d860d8..91779a5 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,24 +1,26 @@ -"""API for data used to generate menus""" - -# pylint: skip-file +"""Api endpoints for gnqa""" +from datetime import timedelta +from functools import wraps +import json +import sqlite3 +from redis import Redis -from flask import jsonify, request, Blueprint, current_app +from flask import Blueprint +from flask import current_app +from flask import jsonify +from flask import request -from functools import wraps from gn3.llms.process import get_gnqa from gn3.llms.process import get_user_queries from gn3.llms.process import fetch_query_results from gn3.auth.authorisation.oauth2.resource_server import require_oauth from gn3.auth import db -from redis import Redis -import json -import sqlite3 -from datetime import timedelta GnQNA = Blueprint("GnQNA", __name__) def handle_errors(func): + """general error handling decorator function""" @wraps(func) def decorated_function(*args, **kwargs): try: @@ -30,7 +32,7 @@ def handle_errors(func): @GnQNA.route("/gnqna", methods=["POST"]) def gnqa(): - # todo add auth + """Main gnqa endpoint""" query = request.json.get("querygnqa", "") if not query: return jsonify({"error": "querygnqa is missing in the request"}), 400 @@ -38,7 +40,8 @@ def gnqa(): try: fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") if fahamu_token is None: - return jsonify({"query": query, "error": "Use of invalid fahamu auth token"}), 500 + return jsonify({"query": query, + "error": "Use of invalid fahamu auth token"}), 500 task_id, answer, refs = get_gnqa( query, fahamu_token, current_app.config.get("DATA_DIR")) response = { @@ -49,19 +52,22 @@ def gnqa(): } with (Redis.from_url(current_app.config["REDIS_URI"], decode_responses=True) as redis_conn): - # The key will be deleted after 60 seconds - redis_conn.setex(f"LLM:random_user-{query}", timedelta(days=10), json.dumps(response)) + redis_conn.setex( + f"LLM:random_user-{query}", + timedelta(days=10), json.dumps(response)) return jsonify({ **response, "prev_queries": get_user_queries("random_user", redis_conn) }) except Exception as error: - return jsonify({"query": query, "error": f"Request failed-{str(error)}"}), 500 + return jsonify({"query": query, + "error": f"Request failed-{str(error)}"}), 500 @GnQNA.route("/rating/", methods=["POST"]) @require_oauth("profile") def rating(task_id): + """Endpoint for rating qnqa query and answer""" try: llm_db_path = current_app.config["LLM_DB_PATH"] with (require_oauth.acquire("profile") as token, @@ -81,14 +87,16 @@ def rating(task_id): task_id TEXT NOT NULL UNIQUE )""" cursor.execute(create_table) - cursor.execute("""INSERT INTO Rating(user_id,query,answer,weight,task_id) + cursor.execute("""INSERT INTO Rating(user_id,query, + answer,weight,task_id) VALUES(?,?,?,?,?) ON CONFLICT(task_id) DO UPDATE SET weight=excluded.weight """, (str(user_id), query, answer, weight, task_id)) return { - "message": "You have successfully rated this query:Thank you!!" - }, 200 + "message": + "You have successfully rated this query:Thank you!!" + }, 200 except sqlite3.Error as error: return jsonify({"error": str(error)}), 500 except Exception as error: @@ -99,9 +107,10 @@ def rating(task_id): @require_oauth("profile user") @handle_errors def fetch_user_hist(query): - - with (require_oauth.acquire("profile user") as the_token, Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn): + """"Endpoint to fetch previos searches for User""" + with (require_oauth.acquire("profile user") as the_token, + Redis.from_url(current_app.config["REDIS_URI"], + decode_responses=True) as redis_conn): return jsonify({ **fetch_query_results(query, the_token.user.id, redis_conn), "prev_queries": get_user_queries("random_user", redis_conn) @@ -111,9 +120,12 @@ def fetch_user_hist(query): @GnQNA.route("/historys/", methods=["GET"]) @handle_errors def fetch_users_hist_records(query): - """method to fetch all users hist:note this is a test functionality to be replaced by fetch_user_hist""" + """method to fetch all users hist:note this is a test functionality + to be replaced by fetch_user_hist + """ - with Redis.from_url(current_app.config["REDIS_URI"], decode_responses=True) as redis_conn: + with Redis.from_url(current_app.config["REDIS_URI"], + decode_responses=True) as redis_conn: return jsonify({ **fetch_query_results(query, "random_user", redis_conn), "prev_queries": get_user_queries("random_user", redis_conn) @@ -123,6 +135,8 @@ def fetch_users_hist_records(query): @GnQNA.route("/get_hist_names", methods=["GET"]) @handle_errors def fetch_prev_hist_ids(): - - with (Redis.from_url(current_app.config["REDIS_URI"], decode_responses=True)) as redis_conn: - return jsonify({"prev_queries": get_user_queries("random_user", redis_conn)}) + """Test method for fetching history for Anony Users""" + with (Redis.from_url(current_app.config["REDIS_URI"], + decode_responses=True)) as redis_conn: + return jsonify({"prev_queries": get_user_queries("random_user", + redis_conn)}) -- cgit v1.2.3 From d2b5a1c11f6e09ee13b72669068e326b8131f65c Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 15 May 2024 19:01:56 +0300 Subject: Remove broad error handling. --- gn3/api/llm.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 91779a5..08783db 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,6 +1,5 @@ """Api endpoints for gnqa""" from datetime import timedelta -from functools import wraps import json import sqlite3 from redis import Redis @@ -19,17 +18,6 @@ from gn3.auth import db GnQNA = Blueprint("GnQNA", __name__) -def handle_errors(func): - """general error handling decorator function""" - @wraps(func) - def decorated_function(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception as error: - return jsonify({"error": str(error)}), 500 - return decorated_function - - @GnQNA.route("/gnqna", methods=["POST"]) def gnqa(): """Main gnqa endpoint""" @@ -105,7 +93,6 @@ def rating(task_id): @GnQNA.route("/history/", methods=["GET"]) @require_oauth("profile user") -@handle_errors def fetch_user_hist(query): """"Endpoint to fetch previos searches for User""" with (require_oauth.acquire("profile user") as the_token, @@ -118,7 +105,6 @@ def fetch_user_hist(query): @GnQNA.route("/historys/", methods=["GET"]) -@handle_errors def fetch_users_hist_records(query): """method to fetch all users hist:note this is a test functionality to be replaced by fetch_user_hist @@ -133,7 +119,6 @@ def fetch_users_hist_records(query): @GnQNA.route("/get_hist_names", methods=["GET"]) -@handle_errors def fetch_prev_hist_ids(): """Test method for fetching history for Anony Users""" with (Redis.from_url(current_app.config["REDIS_URI"], -- cgit v1.2.3 From 82c89c302a87082c47d7d773264dae4872ba6d1c Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 15 May 2024 19:08:53 +0300 Subject: Rename GnQNA blueprint to gnqa. * register gnqa api endpoint --- gn3/api/llm.py | 20 +++++++++----------- gn3/app.py | 4 ++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 08783db..442252f 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -12,14 +12,15 @@ from flask import request from gn3.llms.process import get_gnqa from gn3.llms.process import get_user_queries from gn3.llms.process import fetch_query_results +from gn3.llms.errors import LLMError from gn3.auth.authorisation.oauth2.resource_server import require_oauth from gn3.auth import db -GnQNA = Blueprint("GnQNA", __name__) +gnqa = Blueprint("gnqa", __name__) -@GnQNA.route("/gnqna", methods=["POST"]) -def gnqa(): +@gnqa.route("/gnqna", methods=["POST"]) +def gnqna(): """Main gnqa endpoint""" query = request.json.get("querygnqa", "") if not query: @@ -47,12 +48,12 @@ def gnqa(): **response, "prev_queries": get_user_queries("random_user", redis_conn) }) - except Exception as error: + except LLMError as error: return jsonify({"query": query, "error": f"Request failed-{str(error)}"}), 500 -@GnQNA.route("/rating/", methods=["POST"]) +@gnqa.route("/rating/", methods=["POST"]) @require_oauth("profile") def rating(task_id): """Endpoint for rating qnqa query and answer""" @@ -87,11 +88,9 @@ def rating(task_id): }, 200 except sqlite3.Error as error: return jsonify({"error": str(error)}), 500 - except Exception as error: - raise error -@GnQNA.route("/history/", methods=["GET"]) +@gnqa.route("/history/", methods=["GET"]) @require_oauth("profile user") def fetch_user_hist(query): """"Endpoint to fetch previos searches for User""" @@ -104,12 +103,11 @@ def fetch_user_hist(query): }) -@GnQNA.route("/historys/", methods=["GET"]) +@gnqa.route("/historys/", methods=["GET"]) def fetch_users_hist_records(query): """method to fetch all users hist:note this is a test functionality to be replaced by fetch_user_hist """ - with Redis.from_url(current_app.config["REDIS_URI"], decode_responses=True) as redis_conn: return jsonify({ @@ -118,7 +116,7 @@ def fetch_users_hist_records(query): }) -@GnQNA.route("/get_hist_names", methods=["GET"]) +@gnqa.route("/get_hist_names", methods=["GET"]) def fetch_prev_hist_ids(): """Test method for fetching history for Anony Users""" with (Redis.from_url(current_app.config["REDIS_URI"], diff --git a/gn3/app.py b/gn3/app.py index 3f1e6ee..c8f0c5a 100644 --- a/gn3/app.py +++ b/gn3/app.py @@ -25,7 +25,7 @@ from gn3.api.menu import menu from gn3.api.search import search from gn3.api.metadata import metadata from gn3.api.sampledata import sampledata -from gn3.api.llm import GnQNA +from gn3.api.llm import gnqa from gn3.auth import oauth2 from gn3.case_attributes import caseattr @@ -78,7 +78,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.register_blueprint(sampledata, url_prefix="/api/sampledata") app.register_blueprint(oauth2, url_prefix="/api/oauth2") app.register_blueprint(caseattr, url_prefix="/api/case-attribute") - app.register_blueprint(GnQNA, url_prefix="/api/llm") + app.register_blueprint(gnqa, url_prefix="/api/llm") register_error_handlers(app) return app -- cgit v1.2.3 From 7aa31cf63e17efe194e501bc37068a2207ab8f38 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 25 Apr 2024 19:45:00 +0300 Subject: Pep8 formatting for response file. --- gn3/llms/response.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/gn3/llms/response.py b/gn3/llms/response.py index 11cbd94..93320e9 100644 --- a/gn3/llms/response.py +++ b/gn3/llms/response.py @@ -1,11 +1,10 @@ # pylint: skip-file -import string import json import os -basedir = os.path.abspath(os.path.dirname(__file__)) +basedir = os.path.abspath(os.path.dirname(__file__)) class DocIDs(): @@ -26,21 +25,20 @@ class DocIDs(): return result else: raise Exception("\n{0} -- File does not exist\n".format(file_path)) - + def formatDocIDs(self, values): for _key, _val in values.items(): if isinstance(_val, list): for theObject in _val: docName = self.formatDocumentName(theObject['filename']) - docID = theObject['id'] + docID = theObject['id'] self.doc_ids.update({docID: docName}) - - def formatDocumentName(self, val): - result = val.removesuffix('.pdf') - result = result.removesuffix('.txt') - result = result.replace('_', ' ') - return result + def formatDocumentName(self, val): + result = val.removesuffix('.pdf') + result = result.removesuffix('.txt') + result = result.replace('_', ' ') + return result def getInfo(self, doc_id): if doc_id in self.doc_ids.keys(): @@ -48,6 +46,7 @@ class DocIDs(): else: return doc_id + class RespContext(): def __init__(self, context): self.cntxt = context @@ -66,10 +65,9 @@ class RespContext(): def createAccordionFromJson(theContext): result = '' # loop thru json array - ndx = 0 for docID, summaryLst in theContext.items(): # item is a key with a list comboTxt = '' for entry in summaryLst: comboTxt += '\t' + entry['text'] - return result \ No newline at end of file + return result -- cgit v1.2.3 From f6acfd3d6024ad36ef82a8e27918b03f6538cccc Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 25 Apr 2024 19:14:12 +0300 Subject: Code refactoring * this commit removes ununsed imports and also refactor GenenetworkQAclient Class --- gn3/llms/client.py | 67 +++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 042becd..b843907 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -2,19 +2,13 @@ import json import string import os -import datetime import time import requests from requests import Session -from urllib.parse import urljoin from requests.packages.urllib3.util.retry import Retry -from requests import HTTPError -from requests import Session from requests.adapters import HTTPAdapter -from urllib.request import urlretrieve from urllib.parse import quote -from gn3.llms.errors import UnprocessableEntity from gn3.llms.errors import LLMError basedir = os.path.join(os.path.dirname(__file__)) @@ -24,7 +18,8 @@ class TimeoutHTTPAdapter(HTTPAdapter): def __init__(self, timeout, *args, **kwargs): """TimeoutHTTPAdapter constructor. Args: - timeout (int): How many seconds to wait for the server to send data before + timeout (int): How many seconds to wait for the server to + send data before giving up. """ self.timeout = timeout @@ -43,7 +38,8 @@ class GeneNetworkQAClient(Session): """GeneNetworkQA Client This class provides a client object interface to the GeneNetworkQA API. - It extends the `requests.Session` class and includes authorization, base URL, + It extends the `requests.Session` class and includes authorization, + base URL, request timeouts, and request retries. Args: @@ -52,16 +48,19 @@ class GeneNetworkQAClient(Session): version (str, optional): API version, defaults to "v3". timeout (int, optional): Timeout value, defaults to 5. total_retries (int, optional): Total retries value, defaults to 5. - backoff_factor (int, optional): Retry backoff factor value, defaults to 30. + backoff_factor (int, optional): Retry backoff factor value, + defaults to 30. Usage: from genenetworkqa import GeneNetworkQAClient - gnqa = GeneNetworkQAClient(account="account-name", api_key="XXXXXXXXXXXXXXXXXXX...") + gnqa = GeneNetworkQAClient(account="account-name", + api_key="XXXXXXXXXXXXXXXXXXX...") """ BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks' - def __init__(self, account, api_key, version="v3", timeout=30, total_retries=5, backoff_factor=30): + def __init__(self, account, api_key, version="v3", timeout=30, + total_retries=5, backoff_factor=30): super().__init__() self.headers.update( {"Authorization": "Bearer " + api_key}) @@ -95,31 +94,31 @@ class GeneNetworkQAClient(Session): return bib_info @staticmethod - def ask_the_documents(extend_url, my_auth): + def ask_the_documents(self, extend_url, my_auth): try: response = requests.post( - base_url + extend_url, data={}, headers=my_auth) + self.base_url + extend_url, data={}, headers=my_auth) response.raise_for_status() except requests.exceptions.RequestException as e: # Handle the exception appropriately, e.g., log the error raise RuntimeError(f"Error making the request: {e}") if response.status_code != 200: - return negative_status_msg(response), 0 + return GeneNetworkQAClient.negative_status_msg(response), 0 - task_id = get_task_id_from_result(response) - response = get_answer_using_task_id(task_id, my_auth) + task_id = GeneNetworkQAClient.get_task_id_from_result(response) + response = GeneNetworkQAClient.get_answer_using_task_id(task_id, + my_auth) if response.status_code != 200: - return negative_status_msg(response), 0 + return GeneNetworkQAClient.negative_status_msg(response), 0 return response, 1 @staticmethod def negative_status_msg(response): return f"Error: Status code -{response.status_code}- Reason::{response.reason}" - # return f"Problems\n\tStatus code => {response.status_code}\n\tReason => {response.reason}" def ask(self, exUrl, *args, **kwargs): askUrl = self.BASE_URL + exUrl @@ -147,18 +146,13 @@ class GeneNetworkQAClient(Session): response.raise_for_status() except requests.exceptions.HTTPError as error: - if error.response.status_code ==500: - raise LLMError(error.request, error.response, f"Response Error,status_code:{error.response.status_code},Reason: Use of Invalid Token") - elif error.response.status_code ==404: - raise LLMError(error.request,error.response,f"404 Client Error: Not Found for url: {self.BASE_URL}") + if error.response.status_code == 500: + raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") + elif error.response.status_code == 404: + raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.BASE_URL}") raise error - except requests.exceptions.RequestException as error: - raise error - - - - + raise error if response.ok: if method.lower() == "get" and response.json().get("data") is None: time.sleep(retry_delay) @@ -175,11 +169,10 @@ class GeneNetworkQAClient(Session): result = f"?task_id={task_id.get('task_id', '')}" return result - @staticmethod - def get_answer_using_task_id(extend_url, my_auth): + def get_answer_using_task_id(self, extend_url, my_auth): try: response = requests.get( - answer_url + extend_url, data={}, headers=my_auth) + self.answer_url + extend_url, data={}, headers=my_auth) response.raise_for_status() return response except requests.exceptions.RequestException as error: @@ -189,7 +182,8 @@ class GeneNetworkQAClient(Session): @staticmethod def filter_response_text(val): """ - Filters out non-printable characters from the input string and parses it as JSON. + Filters out non-printable characters from + the input string and parses it as JSON. Args: val (str): Input string to be filtered and parsed. @@ -198,7 +192,8 @@ class GeneNetworkQAClient(Session): dict: Parsed JSON object. # remove this """ - return json.loads(''.join([str(char) for char in val if char in string.printable])) + return json.loads(''.join([str(char) for char in val if char + in string.printable])) def getTaskIDFromResult(self, res): return json.loads(res.text) @@ -208,11 +203,11 @@ class GeneNetworkQAClient(Session): def get_gnqa(self, query): qstr = quote(query) - res, task_id = api_client.ask('?ask=' + qstr) - res, success = api_client.get_answer(task_id) + res, task_id = GeneNetworkQAClient.ask('?ask=' + qstr) + res, success = GeneNetworkQAClient.get_answer(task_id) if success == 1: - resp_text = filter_response_text(res.text) + resp_text = GeneNetworkQAClient.filter_response_text(res.text) answer = resp_text.get('data', {}).get('answer', '') context = resp_text.get('data', {}).get('context', '') return answer, context -- cgit v1.2.3 From 852f5c65cdf78c92012afcf9790d272b4e3f4419 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 13:05:46 +0300 Subject: Pep8 Cleanup for llms/errors file --- gn3/llms/errors.py | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py index e9f7c02..af3d7b0 100644 --- a/gn3/llms/errors.py +++ b/gn3/llms/errors.py @@ -1,32 +1,11 @@ - -# pylint: skip-file +""" Error handlers for Fahamu Api""" import json - from requests import HTTPError class UnprocessableEntity(HTTPError): - """An HTTP 422 Unprocessable Entity error occurred. - + """Error for HTTP 422 Unprocessable Entity https://help.helpjuice.com/en_US/api-v3/api-v3#errors - - The request could not be processed, usually due to a missing or invalid parameter. - - The response will also include an error object with an explanation of fields that - are missing or invalid. Here is an example: - - .. code-block:: - - HTTP/1.1 422 Unprocessable Entity - - - { - "errors": [ - { - "email": "is not valid." - } - ] - } """ def __init__(self, request, response): @@ -57,6 +36,7 @@ class UnprocessableEntity(HTTPError): class LLMError(HTTPError): + """Custom error from making Fahamu APi request """ def __init__(self, request, response, msg): super(HTTPError, self).__init__( msg, request=request, response=response) -- cgit v1.2.3 From 167706bb417627b1d9fcacb7cdcebeafe886c1ba Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 13:17:18 +0300 Subject: Remove unused imports for llm.response file --- gn3/llms/response.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/gn3/llms/response.py b/gn3/llms/response.py index 93320e9..7fce75b 100644 --- a/gn3/llms/response.py +++ b/gn3/llms/response.py @@ -45,29 +45,3 @@ class DocIDs(): return self.doc_ids[doc_id] else: return doc_id - - -class RespContext(): - def __init__(self, context): - self.cntxt = context - self.theObj = {} - - def parseIntoObject(self, info): - # check for obj, arr, or val - for key, val in info.items(): - if isinstance(val, list): - self.parseIntoObject(val) - elif isinstance(val, str) or isinstance(val, int): - self.theObj[key] = val - self.theObj[key] = self.val - - -def createAccordionFromJson(theContext): - result = '' - # loop thru json array - for docID, summaryLst in theContext.items(): - # item is a key with a list - comboTxt = '' - for entry in summaryLst: - comboTxt += '\t' + entry['text'] - return result -- cgit v1.2.3 From 92ab53e9f0b10ebf7423626e37b3fe73eb8c0b65 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:05:02 +0300 Subject: Refactor code for llm:response --- gn3/llms/response.py | 60 ++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/gn3/llms/response.py b/gn3/llms/response.py index 7fce75b..2f00312 100644 --- a/gn3/llms/response.py +++ b/gn3/llms/response.py @@ -1,46 +1,42 @@ - -# pylint: skip-file +""" Module contains code for parsing references doc_ids """ +# pylint: disable=C0301 import json import os - basedir = os.path.abspath(os.path.dirname(__file__)) class DocIDs(): + """ Class Method to Parse document id and names""" def __init__(self): - # open doc ids for GN refs - self.doc_ids = self.loadFile("doc_ids.json") - # open doc ids for Diabetes references - self.sugar_doc_ids = self.loadFile("all_files.json") - # format is not what I prefer, it needs to be rebuilt - self.formatDocIDs(self.sugar_doc_ids) - - def loadFile(self, file_name): + """ + init method for Docids + * doc_ids.json: opens doc)ids for gn references + * sugar_doc_ids: open doci_ids for diabetes references + """ + self.doc_ids = self.load_file("doc_ids.json") + self.sugar_doc_ids = self.load_file("all_files.json") + self.format_doc_ids(self.sugar_doc_ids) + + def load_file(self, file_name): + """Method to load and read doc_id files""" file_path = os.path.join(basedir, file_name) if os.path.isfile(file_path): - f = open(file_path, "rb") - result = json.load(f) - f.close() - return result + with open(file_path, "rb") as file_handler: + return json.load(file_handler) else: - raise Exception("\n{0} -- File does not exist\n".format(file_path)) - - def formatDocIDs(self, values): - for _key, _val in values.items(): - if isinstance(_val, list): - for theObject in _val: - docName = self.formatDocumentName(theObject['filename']) - docID = theObject['id'] - self.doc_ids.update({docID: docName}) - - def formatDocumentName(self, val): - result = val.removesuffix('.pdf') - result = result.removesuffix('.txt') - result = result.replace('_', ' ') - return result - - def getInfo(self, doc_id): + raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") + + def format_doc_ids(self, docs): + """method to format doc_ids for list items""" + for _key, val in docs.items(): + if isinstance(val, list): + for doc_obj in val: + doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") + self.doc_ids.update({doc_obj["id"]: doc_name}) + + def get_info(self, doc_id): + """ interface to make read from doc_ids""" if doc_id in self.doc_ids.keys(): return self.doc_ids[doc_id] else: -- cgit v1.2.3 From 69013d298c869a42059af13bc63bef1bbdc7393d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:05:30 +0300 Subject: Update file to use correct import from response file --- gn3/llms/process.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index e38b73e..4edc238 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -49,8 +49,9 @@ def parse_context(context, get_info_func, format_bib_func): def rate_document(task_id, doc_id, rating, auth_token): - """This method is used to provide feedback for a document by making a rating.""" - # todo move this to clients + """This method is used to provide + feedback for a document by making a rating + """ try: url = urljoin(BASE_URL, f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""") @@ -107,7 +108,7 @@ def get_gnqa(query, auth_token, tmp_dir=""): answer = resp_text['data']['answer'] context = resp_text['data']['context'] references = parse_context( - context, DocIDs().getInfo, format_bibliography_info) + context, DocIDs().get_info, format_bibliography_info) references = fetch_pubmed(references, "pubmed.json", tmp_dir) return task_id, answer, references -- cgit v1.2.3 From 75365bd88a720261a1b454f0ea11a840fb3be83e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:20:00 +0300 Subject: Move Parsing Doc_Ids to process file * Context: groups related items --- gn3/llms/process.py | 51 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 4edc238..1881e92 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -1,21 +1,56 @@ """this module contains code for processing response from fahamu client.py""" +# pylint: disable=C0301 import os import string import json +import logging +import requests from urllib.parse import urljoin from urllib.parse import quote -import logging -import requests from gn3.llms.client import GeneNetworkQAClient -from gn3.llms.response import DocIDs BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks' - - -# pylint: disable=C0301 +BASEDIR = os.path.abspath(os.path.dirname(__file__)) + + +class DocIDs(): + """ Class Method to Parse document id and names from files""" + def __init__(self): + """ + init method for Docids + * doc_ids.json: opens doc)ids for gn references + * sugar_doc_ids: open doci_ids for diabetes references + """ + self.doc_ids = self.load_file("doc_ids.json") + self.sugar_doc_ids = self.load_file("all_files.json") + self.format_doc_ids(self.sugar_doc_ids) + + def load_file(self, file_name): + """Method to load and read doc_id files""" + file_path = os.path.join(BASEDIR, file_name) + if os.path.isfile(file_path): + with open(file_path, "rb") as file_handler: + return json.load(file_handler) + else: + raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") + + def format_doc_ids(self, docs): + """method to format doc_ids for list items""" + for _key, val in docs.items(): + if isinstance(val, list): + for doc_obj in val: + doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") + self.doc_ids.update({doc_obj["id"]: doc_name}) + + def get_info(self, doc_id): + """ interface to make read from doc_ids""" + if doc_id in self.doc_ids.keys(): + return self.doc_ids[doc_id] + else: + return doc_id def format_bibliography_info(bib_info): @@ -131,6 +166,6 @@ def fetch_query_results(query, user_id, redis_conn): def get_user_queries(user_id, redis_conn): """methos to fetch all queries for a specific user""" - results = redis_conn.keys(f"LLM:{user_id}*") - return [query for query in [result.partition("-")[2] for result in results] if query != ""] + return [query for query in + [result.partition("-")[2] for result in results] if query != ""] -- cgit v1.2.3 From f911db6b69b16ac5df57b27d213fa88a4c848f50 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:21:47 +0300 Subject: Delete response file * File is obsolete functionality move to process.py file --- gn3/llms/response.py | 43 ------------------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 gn3/llms/response.py diff --git a/gn3/llms/response.py b/gn3/llms/response.py deleted file mode 100644 index 2f00312..0000000 --- a/gn3/llms/response.py +++ /dev/null @@ -1,43 +0,0 @@ -""" Module contains code for parsing references doc_ids """ -# pylint: disable=C0301 -import json -import os - -basedir = os.path.abspath(os.path.dirname(__file__)) - - -class DocIDs(): - """ Class Method to Parse document id and names""" - def __init__(self): - """ - init method for Docids - * doc_ids.json: opens doc)ids for gn references - * sugar_doc_ids: open doci_ids for diabetes references - """ - self.doc_ids = self.load_file("doc_ids.json") - self.sugar_doc_ids = self.load_file("all_files.json") - self.format_doc_ids(self.sugar_doc_ids) - - def load_file(self, file_name): - """Method to load and read doc_id files""" - file_path = os.path.join(basedir, file_name) - if os.path.isfile(file_path): - with open(file_path, "rb") as file_handler: - return json.load(file_handler) - else: - raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") - - def format_doc_ids(self, docs): - """method to format doc_ids for list items""" - for _key, val in docs.items(): - if isinstance(val, list): - for doc_obj in val: - doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") - self.doc_ids.update({doc_obj["id"]: doc_name}) - - def get_info(self, doc_id): - """ interface to make read from doc_ids""" - if doc_id in self.doc_ids.keys(): - return self.doc_ids[doc_id] - else: - return doc_id -- cgit v1.2.3 From a5a6e319e85c28ff3ab9d6f2d8a869bc2ac77ac8 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:25:44 +0300 Subject: Delete function: only useful when training own llm model. --- gn3/llms/process.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 1881e92..e47a997 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -83,26 +83,6 @@ def parse_context(context, get_info_func, format_bib_func): return results -def rate_document(task_id, doc_id, rating, auth_token): - """This method is used to provide - feedback for a document by making a rating - """ - try: - url = urljoin(BASE_URL, - f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""") - headers = {"Authorization": f"Bearer {auth_token}"} - - resp = requests.post(url, headers=headers) - resp.raise_for_status() - - return {"status": "success", **resp.json()} - except requests.exceptions.HTTPError as http_error: - raise RuntimeError(f"HTTP Error Occurred:\ - {http_error.response.text} -with status code- {http_error.response.status_code}") from http_error - except Exception as error: - raise RuntimeError(f"An error occurred: {str(error)}") from error - - def load_file(filename, dir_path): """function to open and load json file""" file_path = os.path.join(dir_path, f"{filename}") -- cgit v1.2.3 From 3913374700521647e93bf9afabb9943746ac5d5b Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:27:20 +0300 Subject: Pep8 formatting gn3:llm:process. --- gn3/llms/process.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index e47a997..d080acb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -6,9 +6,7 @@ import json import logging import requests -from urllib.parse import urljoin from urllib.parse import quote - from gn3.llms.client import GeneNetworkQAClient -- cgit v1.2.3 From f30300a82f605fa96130fbcbdcd17c53296d2372 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 16:34:34 +0300 Subject: Minor code refactoring related --- gn3/llms/process.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index d080acb..11961eb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -4,9 +4,9 @@ import os import string import json import logging +from urllib.parse import quote import requests -from urllib.parse import quote from gn3.llms.client import GeneNetworkQAClient @@ -106,15 +106,24 @@ def fetch_pubmed(references, file_name, data_dir=""): return references -def get_gnqa(query, auth_token, tmp_dir=""): - """entry function for the gn3 api endpoint()""" +def get_gnqa(query, auth_token, data_dir=""): + """entry function for the gn3 api endpoint() + ARGS: + query: what is a gene + auth_token: token to connect to api_client + data_dir: base datirectory for gn3 data + Returns: + task_id: fahamu unique identifier for task + answer + references: contains doc_name,reference,pub_med_info + """ - api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) + api_client = GeneNetworkQAClient(requests.Session(), auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") - res, success = api_client.get_answer(task_id) - if success == 1: + res, status = api_client.get_answer(task_id) + if status == 1: resp_text = filter_response_text(res.text) if resp_text.get("data") is None: return task_id, "Please try to rephrase your question to receive feedback", [] @@ -122,7 +131,7 @@ def get_gnqa(query, auth_token, tmp_dir=""): context = resp_text['data']['context'] references = parse_context( context, DocIDs().get_info, format_bibliography_info) - references = fetch_pubmed(references, "pubmed.json", tmp_dir) + references = fetch_pubmed(references, "pubmed.json", data_dir) return task_id, answer, references else: -- cgit v1.2.3 From fe23477126b482472f6193797f7d88f59421900c Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 16:35:03 +0300 Subject: Init code refactoring for clients fahamu interface --- gn3/llms/client.py | 66 +++++++++--------------------------------------------- 1 file changed, 11 insertions(+), 55 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index b843907..14dcef3 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,20 +1,19 @@ +"""Module Contains code for making request to fahamu Api""" # pylint: skip-file import json import string -import os import time import requests - from requests import Session from requests.packages.urllib3.util.retry import Retry from requests.adapters import HTTPAdapter from urllib.parse import quote from gn3.llms.errors import LLMError -basedir = os.path.join(os.path.dirname(__file__)) - class TimeoutHTTPAdapter(HTTPAdapter): + """HTTP TimeoutAdapter """ + # todo rework on this def __init__(self, timeout, *args, **kwargs): """TimeoutHTTPAdapter constructor. Args: @@ -79,48 +78,29 @@ class GeneNetworkQAClient(Session): self.mount("https://", adapter) self.mount("http://", adapter) - @staticmethod - def format_bibliography_info(bib_info): - - if isinstance(bib_info, str): - # Remove '.txt' - bib_info = bib_info.removesuffix('.txt') - elif isinstance(bib_info, dict): - # Format string bibliography information - bib_info = "{0}.{1}.{2}.{3} ".format(bib_info.get('author', ''), - bib_info.get('title', ''), - bib_info.get('year', ''), - bib_info.get('doi', '')) - return bib_info - - @staticmethod def ask_the_documents(self, extend_url, my_auth): try: response = requests.post( self.base_url + extend_url, data={}, headers=my_auth) response.raise_for_status() except requests.exceptions.RequestException as e: - # Handle the exception appropriately, e.g., log the error raise RuntimeError(f"Error making the request: {e}") - if response.status_code != 200: return GeneNetworkQAClient.negative_status_msg(response), 0 - task_id = GeneNetworkQAClient.get_task_id_from_result(response) response = GeneNetworkQAClient.get_answer_using_task_id(task_id, my_auth) - if response.status_code != 200: - return GeneNetworkQAClient.negative_status_msg(response), 0 - return response, 1 @staticmethod def negative_status_msg(response): + """ handler for non 200 response from fahamu api""" return f"Error: Status code -{response.status_code}- Reason::{response.reason}" def ask(self, exUrl, *args, **kwargs): + """fahamu ask api interface""" askUrl = self.BASE_URL + exUrl res = self.custom_request('POST', askUrl, *args, **kwargs) if (res.status_code != 200): @@ -129,6 +109,7 @@ class GeneNetworkQAClient(Session): return res, task_id def get_answer(self, taskid, *args, **kwargs): + """Fahamu get answer interface""" query = self.answer_url + self.extendTaskID(taskid) res = self.custom_request('GET', query, *args, **kwargs) if (res.status_code != 200): @@ -136,15 +117,13 @@ class GeneNetworkQAClient(Session): return res, 1 def custom_request(self, method, url, *args, **kwargs): - + """ make custom request to fahamu api ask and get response""" max_retries = 50 retry_delay = 3 - for i in range(max_retries): try: response = super().request(method, url, *args, **kwargs) response.raise_for_status() - except requests.exceptions.HTTPError as error: if error.response.status_code == 500: raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") @@ -165,33 +144,23 @@ class GeneNetworkQAClient(Session): @staticmethod def get_task_id_from_result(response): + """method to get task_id from response""" task_id = json.loads(response.text) - result = f"?task_id={task_id.get('task_id', '')}" - return result + return f"?task_id={task_id.get('task_id', '')}" def get_answer_using_task_id(self, extend_url, my_auth): + """call this method with task id to fetch response""" try: response = requests.get( self.answer_url + extend_url, data={}, headers=my_auth) response.raise_for_status() return response except requests.exceptions.RequestException as error: - # Handle the exception appropriately, e.g., log the error raise error @staticmethod def filter_response_text(val): - """ - Filters out non-printable characters from - the input string and parses it as JSON. - - Args: - val (str): Input string to be filtered and parsed. - - Returns: - dict: Parsed JSON object. - # remove this - """ + """method to filter out non-printable chacracters""" return json.loads(''.join([str(char) for char in val if char in string.printable])) @@ -200,16 +169,3 @@ class GeneNetworkQAClient(Session): def extendTaskID(self, task_id): return '?task_id=' + str(task_id['task_id']) - - def get_gnqa(self, query): - qstr = quote(query) - res, task_id = GeneNetworkQAClient.ask('?ask=' + qstr) - res, success = GeneNetworkQAClient.get_answer(task_id) - - if success == 1: - resp_text = GeneNetworkQAClient.filter_response_text(res.text) - answer = resp_text.get('data', {}).get('answer', '') - context = resp_text.get('data', {}).get('context', '') - return answer, context - else: - return res, "Unfortunately, I have nothing." -- cgit v1.2.3 From e0aadacbfc23f240c9dad1d0cd430ffcfa99d547 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 17:50:57 +0300 Subject: Pylint Fixes. --- gn3/llms/client.py | 83 ++++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 14dcef3..2e9898f 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,13 +1,13 @@ """Module Contains code for making request to fahamu Api""" -# pylint: skip-file +# pylint: disable=C0301 import json -import string import time + import requests from requests import Session -from requests.packages.urllib3.util.retry import Retry from requests.adapters import HTTPAdapter -from urllib.parse import quote + +from urllib3.util import Retry from gn3.llms.errors import LLMError @@ -56,15 +56,14 @@ class GeneNetworkQAClient(Session): api_key="XXXXXXXXXXXXXXXXXXX...") """ - BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks' - def __init__(self, account, api_key, version="v3", timeout=30, total_retries=5, backoff_factor=30): super().__init__() self.headers.update( {"Authorization": "Bearer " + api_key}) - self.answer_url = f"{self.BASE_URL}/answers" - self.feedback_url = f"{self.BASE_URL}/feedback" + self.base_url = "https://genenetwork.fahamuai.com/api/tasks" + self.answer_url = f"{self.base_url}/answers" + self.feedback_url = f"{self.base_url}/feedback" adapter = TimeoutHTTPAdapter( timeout=timeout, @@ -83,8 +82,8 @@ class GeneNetworkQAClient(Session): response = requests.post( self.base_url + extend_url, data={}, headers=my_auth) response.raise_for_status() - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Error making the request: {e}") + except requests.exceptions.RequestException as error: + raise RuntimeError(f"Error making the request: {error}") from error if response.status_code != 200: return GeneNetworkQAClient.negative_status_msg(response), 0 task_id = GeneNetworkQAClient.get_task_id_from_result(response) @@ -99,36 +98,50 @@ class GeneNetworkQAClient(Session): """ handler for non 200 response from fahamu api""" return f"Error: Status code -{response.status_code}- Reason::{response.reason}" - def ask(self, exUrl, *args, **kwargs): + def ask(self, ex_url, *args, **kwargs): """fahamu ask api interface""" - askUrl = self.BASE_URL + exUrl - res = self.custom_request('POST', askUrl, *args, **kwargs) - if (res.status_code != 200): + res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs) + if res.status_code != 200: return self.negative_status_msg(res), 0 - task_id = self.getTaskIDFromResult(res) - return res, task_id + return res, json.loads(res.text) def get_answer(self, taskid, *args, **kwargs): """Fahamu get answer interface""" - query = self.answer_url + self.extendTaskID(taskid) + query = f"{self.answer_url}?task_id={taskid['task_id']}" res = self.custom_request('GET', query, *args, **kwargs) - if (res.status_code != 200): + if res.status_code != 200: return self.negative_status_msg(res), 0 return res, 1 + @staticmethod + def get_task_id_from_result(response): + """method to get task_id from response""" + task_id = json.loads(response.text) + return f"?task_id={task_id.get('task_id', '')}" + + def get_answer_using_task_id(self, extend_url, my_auth): + """call this method with task id to fetch response""" + try: + response = requests.get( + self.answer_url + extend_url, data={}, headers=my_auth) + response.raise_for_status() + return response + except requests.exceptions.RequestException as error: + raise error + def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" max_retries = 50 retry_delay = 3 - for i in range(max_retries): + for _i in range(max_retries): try: response = super().request(method, url, *args, **kwargs) response.raise_for_status() except requests.exceptions.HTTPError as error: if error.response.status_code == 500: - raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") + raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error elif error.response.status_code == 404: - raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.BASE_URL}") + raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.base_url}") from error raise error except requests.exceptions.RequestException as error: raise error @@ -141,31 +154,3 @@ class GeneNetworkQAClient(Session): else: time.sleep(retry_delay) return response - - @staticmethod - def get_task_id_from_result(response): - """method to get task_id from response""" - task_id = json.loads(response.text) - return f"?task_id={task_id.get('task_id', '')}" - - def get_answer_using_task_id(self, extend_url, my_auth): - """call this method with task id to fetch response""" - try: - response = requests.get( - self.answer_url + extend_url, data={}, headers=my_auth) - response.raise_for_status() - return response - except requests.exceptions.RequestException as error: - raise error - - @staticmethod - def filter_response_text(val): - """method to filter out non-printable chacracters""" - return json.loads(''.join([str(char) for char in val if char - in string.printable])) - - def getTaskIDFromResult(self, res): - return json.loads(res.text) - - def extendTaskID(self, task_id): - return '?task_id=' + str(task_id['task_id']) -- cgit v1.2.3 From 50c8500105912a6380ea8f971ccfb17ef0994279 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 18:56:41 +0300 Subject: Refactor code for http request adapters. --- gn3/llms/client.py | 43 +++++++++++-------------------------------- gn3/llms/process.py | 2 +- 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 2e9898f..810227f 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,5 +1,6 @@ """Module Contains code for making request to fahamu Api""" # pylint: disable=C0301 +# pylint: disable=R0913 import json import time @@ -24,13 +25,13 @@ class TimeoutHTTPAdapter(HTTPAdapter): self.timeout = timeout super().__init__(*args, **kwargs) - def send(self, request, **kwargs): + def send(self, *args, **kwargs): """Override :obj:`HTTPAdapter` send method to add a default timeout.""" timeout = kwargs.get("timeout") if timeout is None: kwargs["timeout"] = self.timeout - return super().send(request, **kwargs) + return super().send(*args, **kwargs) class GeneNetworkQAClient(Session): @@ -77,21 +78,15 @@ class GeneNetworkQAClient(Session): self.mount("https://", adapter) self.mount("http://", adapter) - def ask_the_documents(self, extend_url, my_auth): + def get_answer_using_task_id(self, extend_url, my_auth): + """call this method with task id to fetch response""" try: - response = requests.post( - self.base_url + extend_url, data={}, headers=my_auth) + response = requests.get( + self.answer_url + extend_url, data={}, headers=my_auth) response.raise_for_status() + return response except requests.exceptions.RequestException as error: - raise RuntimeError(f"Error making the request: {error}") from error - if response.status_code != 200: - return GeneNetworkQAClient.negative_status_msg(response), 0 - task_id = GeneNetworkQAClient.get_task_id_from_result(response) - response = GeneNetworkQAClient.get_answer_using_task_id(task_id, - my_auth) - if response.status_code != 200: - return GeneNetworkQAClient.negative_status_msg(response), 0 - return response, 1 + raise error @staticmethod def negative_status_msg(response): @@ -102,7 +97,7 @@ class GeneNetworkQAClient(Session): """fahamu ask api interface""" res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs) if res.status_code != 200: - return self.negative_status_msg(res), 0 + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 return res, json.loads(res.text) def get_answer(self, taskid, *args, **kwargs): @@ -110,25 +105,9 @@ class GeneNetworkQAClient(Session): query = f"{self.answer_url}?task_id={taskid['task_id']}" res = self.custom_request('GET', query, *args, **kwargs) if res.status_code != 200: - return self.negative_status_msg(res), 0 + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 return res, 1 - @staticmethod - def get_task_id_from_result(response): - """method to get task_id from response""" - task_id = json.loads(response.text) - return f"?task_id={task_id.get('task_id', '')}" - - def get_answer_using_task_id(self, extend_url, my_auth): - """call this method with task id to fetch response""" - try: - response = requests.get( - self.answer_url + extend_url, data={}, headers=my_auth) - response.raise_for_status() - return response - except requests.exceptions.RequestException as error: - raise error - def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" max_retries = 50 diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 11961eb..9cb09a1 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -118,7 +118,7 @@ def get_gnqa(query, auth_token, data_dir=""): references: contains doc_name,reference,pub_med_info """ - api_client = GeneNetworkQAClient(requests.Session(), auth_token) + api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") -- cgit v1.2.3 From 4d39c26b65aad3fa48d35fc11007f5f3afe1c112 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 17 May 2024 12:05:16 +0300 Subject: Timeout code refactoring. --- gn3/llms/client.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 810227f..05e3500 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -7,30 +7,23 @@ import time import requests from requests import Session from requests.adapters import HTTPAdapter +from requests.adapters import Retry -from urllib3.util import Retry from gn3.llms.errors import LLMError class TimeoutHTTPAdapter(HTTPAdapter): - """HTTP TimeoutAdapter """ - # todo rework on this + """Set a default timeout for HTTP calls """ def __init__(self, timeout, *args, **kwargs): - """TimeoutHTTPAdapter constructor. - Args: - timeout (int): How many seconds to wait for the server to - send data before - giving up. - """ + """TimeoutHTTPAdapter constructor.""" self.timeout = timeout super().__init__(*args, **kwargs) def send(self, *args, **kwargs): """Override :obj:`HTTPAdapter` send method to add a default timeout.""" - timeout = kwargs.get("timeout") - if timeout is None: - kwargs["timeout"] = self.timeout - + kwargs["timeout"] = ( + kwargs["timeout"] if kwargs.get("timeout") else self.timeout + ) return super().send(*args, **kwargs) -- cgit v1.2.3 From d3bc323fe3a965ee5b6917987c4fe7662056e560 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 17 May 2024 13:34:37 +0300 Subject: Refactor custom request codebase. --- gn3/llms/client.py | 40 +++++++++++++++++++--------------------- gn3/llms/process.py | 5 ++--- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 05e3500..d57bca2 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,6 +1,5 @@ """Module Contains code for making request to fahamu Api""" # pylint: disable=C0301 -# pylint: disable=R0913 import json import time @@ -36,9 +35,7 @@ class GeneNetworkQAClient(Session): request timeouts, and request retries. Args: - account (str): Base address subdomain. api_key (str): API key. - version (str, optional): API version, defaults to "v3". timeout (int, optional): Timeout value, defaults to 5. total_retries (int, optional): Total retries value, defaults to 5. backoff_factor (int, optional): Retry backoff factor value, @@ -50,7 +47,7 @@ class GeneNetworkQAClient(Session): api_key="XXXXXXXXXXXXXXXXXXX...") """ - def __init__(self, account, api_key, version="v3", timeout=30, + def __init__(self, api_key, timeout=30, total_retries=5, backoff_factor=30): super().__init__() self.headers.update( @@ -95,11 +92,14 @@ class GeneNetworkQAClient(Session): def get_answer(self, taskid, *args, **kwargs): """Fahamu get answer interface""" - query = f"{self.answer_url}?task_id={taskid['task_id']}" - res = self.custom_request('GET', query, *args, **kwargs) - if res.status_code != 200: - return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 - return res, 1 + try: + query = f"{self.answer_url}?task_id={taskid['task_id']}" + res = self.custom_request('GET', query, *args, **kwargs) + if res.status_code != 200: + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 + return res, 1 + except TimeoutError: + return "Timeout error occured:try to rephrase your query", 0 def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" @@ -109,20 +109,18 @@ class GeneNetworkQAClient(Session): try: response = super().request(method, url, *args, **kwargs) response.raise_for_status() + if response.ok: + if method.lower() == "get" and response.json().get("data") is None: + time.sleep(retry_delay) + continue + return response + else: + time.sleep(retry_delay) except requests.exceptions.HTTPError as error: if error.response.status_code == 500: raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error - elif error.response.status_code == 404: - raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.base_url}") from error - raise error + raise LLMError(error.request, error.response, + f"HTTP error occurred with error status:{error.response.status_code}") from error except requests.exceptions.RequestException as error: raise error - if response.ok: - if method.lower() == "get" and response.json().get("data") is None: - time.sleep(retry_delay) - continue - else: - return response - else: - time.sleep(retry_delay) - return response + raise TimeoutError diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 9cb09a1..4725bcb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -5,7 +5,6 @@ import string import json import logging from urllib.parse import quote -import requests from gn3.llms.client import GeneNetworkQAClient @@ -118,7 +117,7 @@ def get_gnqa(query, auth_token, data_dir=""): references: contains doc_name,reference,pub_med_info """ - api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) + api_client = GeneNetworkQAClient(api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") @@ -135,7 +134,7 @@ def get_gnqa(query, auth_token, data_dir=""): return task_id, answer, references else: - return task_id, "Please try to rephrase your question to receive feedback", [] + return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", [] def fetch_query_results(query, user_id, redis_conn): -- cgit v1.2.3 From 0832e9579d6fe6c9b46bfa499f1c7726301ac10c Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 17 May 2024 17:07:32 +0300 Subject: Add endpoint for getting prev user searches --- gn3/api/llm.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 442252f..41cc376 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -90,6 +90,18 @@ def rating(task_id): return jsonify({"error": str(error)}), 500 + +@gnqa.route("/searches/", methods=["GET"]) +@require_oauth("profile user") +def fetch_prev_searches(): + with (require_oauth.acquire("profile user") as __the_token, + Redis.from_url(current_app.config["REDIS_URI"], + decode_responses=True) as redis_conn): + return jsonify({ + "prev_queries": get_user_queries("random_user", redis_conn) + }) + + @gnqa.route("/history/", methods=["GET"]) @require_oauth("profile user") def fetch_user_hist(query): @@ -103,6 +115,7 @@ def fetch_user_hist(query): }) + @gnqa.route("/historys/", methods=["GET"]) def fetch_users_hist_records(query): """method to fetch all users hist:note this is a test functionality -- cgit v1.2.3 From 8201936afc4a8330a3dfa25a26b3786f44e8e378 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 21 May 2024 16:35:16 +0300 Subject: Add search gnqa history functionality. --- gn3/api/llm.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 41cc376..b2c9c3e 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,5 +1,4 @@ """Api endpoints for gnqa""" -from datetime import timedelta import json import sqlite3 from redis import Redis @@ -14,6 +13,7 @@ from gn3.llms.process import get_user_queries from gn3.llms.process import fetch_query_results from gn3.llms.errors import LLMError from gn3.auth.authorisation.oauth2.resource_server import require_oauth + from gn3.auth import db gnqa = Blueprint("gnqa", __name__) @@ -39,15 +39,17 @@ def gnqna(): "answer": answer, "references": refs } - with (Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn): - redis_conn.setex( - f"LLM:random_user-{query}", - timedelta(days=10), json.dumps(response)) - return jsonify({ - **response, - "prev_queries": get_user_queries("random_user", redis_conn) - }) + try: + with (Redis.from_url(current_app.config["REDIS_URI"], + decode_responses=True) as redis_conn, + require_oauth.acquire("profile user") as token): + redis_conn.set( + f"LLM:{str(token.user.user_id)}-{str(task_id['task_id'])}", + json.dumps(response) + ) + return response + except Exception: # handle specific error + return response except LLMError as error: return jsonify({"query": query, "error": f"Request failed-{str(error)}"}), 500 @@ -90,16 +92,19 @@ def rating(task_id): return jsonify({"error": str(error)}), 500 - -@gnqa.route("/searches/", methods=["GET"]) +@gnqa.route("/searches", methods=["GET"]) @require_oauth("profile user") def fetch_prev_searches(): - with (require_oauth.acquire("profile user") as __the_token, + """ api method to fetch search query records""" + with (require_oauth.acquire("profile user") as the_token, Redis.from_url(current_app.config["REDIS_URI"], decode_responses=True) as redis_conn): - return jsonify({ - "prev_queries": get_user_queries("random_user", redis_conn) - }) + if request.args.get("search_term"): + return jsonify(json.loads(redis_conn.get(request.args.get("search_term")))) + query_result = {} + for key in redis_conn.scan_iter(f"LLM:{str(the_token.user.user_id)}*"): + query_result[key] = json.loads(redis_conn.get(key)) + return jsonify(query_result) @gnqa.route("/history/", methods=["GET"]) @@ -110,12 +115,11 @@ def fetch_user_hist(query): Redis.from_url(current_app.config["REDIS_URI"], decode_responses=True) as redis_conn): return jsonify({ - **fetch_query_results(query, the_token.user.id, redis_conn), + **fetch_query_results(query, the_token.user.user_id, redis_conn), "prev_queries": get_user_queries("random_user", redis_conn) }) - @gnqa.route("/historys/", methods=["GET"]) def fetch_users_hist_records(query): """method to fetch all users hist:note this is a test functionality -- cgit v1.2.3 From 2a99da9f46233a28e9ea0b6a297d8a6b93f61923 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 21 May 2024 16:38:53 +0300 Subject: Remove obsolete functions. --- gn3/api/llm.py | 39 +-------------------------------------- gn3/llms/process.py | 20 -------------------- 2 files changed, 1 insertion(+), 58 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index b2c9c3e..02b37f9 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -9,8 +9,6 @@ from flask import jsonify from flask import request from gn3.llms.process import get_gnqa -from gn3.llms.process import get_user_queries -from gn3.llms.process import fetch_query_results from gn3.llms.errors import LLMError from gn3.auth.authorisation.oauth2.resource_server import require_oauth @@ -46,7 +44,7 @@ def gnqna(): redis_conn.set( f"LLM:{str(token.user.user_id)}-{str(task_id['task_id'])}", json.dumps(response) - ) + ) return response except Exception: # handle specific error return response @@ -105,38 +103,3 @@ def fetch_prev_searches(): for key in redis_conn.scan_iter(f"LLM:{str(the_token.user.user_id)}*"): query_result[key] = json.loads(redis_conn.get(key)) return jsonify(query_result) - - -@gnqa.route("/history/", methods=["GET"]) -@require_oauth("profile user") -def fetch_user_hist(query): - """"Endpoint to fetch previos searches for User""" - with (require_oauth.acquire("profile user") as the_token, - Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn): - return jsonify({ - **fetch_query_results(query, the_token.user.user_id, redis_conn), - "prev_queries": get_user_queries("random_user", redis_conn) - }) - - -@gnqa.route("/historys/", methods=["GET"]) -def fetch_users_hist_records(query): - """method to fetch all users hist:note this is a test functionality - to be replaced by fetch_user_hist - """ - with Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn: - return jsonify({ - **fetch_query_results(query, "random_user", redis_conn), - "prev_queries": get_user_queries("random_user", redis_conn) - }) - - -@gnqa.route("/get_hist_names", methods=["GET"]) -def fetch_prev_hist_ids(): - """Test method for fetching history for Anony Users""" - with (Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True)) as redis_conn: - return jsonify({"prev_queries": get_user_queries("random_user", - redis_conn)}) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 4725bcb..eba7e4b 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -135,23 +135,3 @@ def get_gnqa(query, auth_token, data_dir=""): return task_id, answer, references else: return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", [] - - -def fetch_query_results(query, user_id, redis_conn): - """this method fetches prev user query searches""" - result = redis_conn.get(f"LLM:{user_id}-{query}") - if result: - return json.loads(result) - return { - "query": query, - "answer": "Sorry No answer for you", - "references": [], - "task_id": None - } - - -def get_user_queries(user_id, redis_conn): - """methos to fetch all queries for a specific user""" - results = redis_conn.keys(f"LLM:{user_id}*") - return [query for query in - [result.partition("-")[2] for result in results] if query != ""] -- cgit v1.2.3 From 12100489a73094016602926183e0ee51002fb9c6 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 22 May 2024 13:37:32 +0300 Subject: Register LLM error in app. * do refactoring for gn3:llm:errors --- gn3/errors.py | 17 ++++++++++++++++- gn3/llms/errors.py | 14 +++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/gn3/errors.py b/gn3/errors.py index 1833bf6..ac9e070 100644 --- a/gn3/errors.py +++ b/gn3/errors.py @@ -16,7 +16,7 @@ from authlib.oauth2.rfc6749.errors import OAuth2Error from flask import Flask, jsonify, Response, current_app from gn3.auth.authorisation.errors import AuthorisationError - +from gn3.llms.errors import LLMError def add_trace(exc: Exception, jsonmsg: dict) -> dict: """Add the traceback to the error handling object.""" @@ -106,6 +106,20 @@ def handle_generic(exc: Exception) -> Response: return resp +def handle_llm_error(exc: Exception) -> Response: + """ Handle llm erros if not handled anywhere else. """ + resp = jsonify({ + "query": exc.query if exc.query else "", + "error_type": type(exc).__name__, + "error": ( + exc.args if bool(exc.args) else "Fahamu gnqa error occurred" + ), + "trace": traceback.format_exc() + }) + resp.status_code = 500 + return resp + + def register_error_handlers(app: Flask): """Register application-level error handlers.""" app.register_error_handler(NotFound, page_not_found) @@ -115,6 +129,7 @@ def register_error_handlers(app: Flask): app.register_error_handler(AuthorisationError, handle_authorisation_error) app.register_error_handler(RemoteDisconnected, internal_server_error) app.register_error_handler(URLError, url_server_error) + app.register_error_handler(LLMError, handle_llm_error) for exc in ( EndPointInternalError, EndPointNotFound, diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py index af3d7b0..3512f4d 100644 --- a/gn3/llms/errors.py +++ b/gn3/llms/errors.py @@ -35,8 +35,12 @@ class UnprocessableEntity(HTTPError): msg, request=request, response=response) -class LLMError(HTTPError): - """Custom error from making Fahamu APi request """ - def __init__(self, request, response, msg): - super(HTTPError, self).__init__( - msg, request=request, response=response) +class LLMErrorMIxins(Exception): + """base class for llm errors""" + + +class LLMError(LLMErrorMIxins): + """custom exception for LLMErrorMIxins""" + def __init__(self, *args, **kwargs): + super().__init__(*args) + self.query = kwargs.get("query") -- cgit v1.2.3 From a304037ce012516b07c17fd0dcb9e816c33a4d58 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 22 May 2024 13:38:06 +0300 Subject: Refactor error handling for llm api's. --- gn3/api/llm.py | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 02b37f9..8e8a50f 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,17 +1,20 @@ """Api endpoints for gnqa""" import json import sqlite3 +import redis from redis import Redis +from authlib.integrations.flask_oauth2.errors import _HTTPException from flask import Blueprint from flask import current_app from flask import jsonify from flask import request + from gn3.llms.process import get_gnqa from gn3.llms.errors import LLMError from gn3.auth.authorisation.oauth2.resource_server import require_oauth - +from gn3.auth.authorisation.errors import AuthorisationError from gn3.auth import db gnqa = Blueprint("gnqa", __name__) @@ -23,12 +26,10 @@ def gnqna(): query = request.json.get("querygnqa", "") if not query: return jsonify({"error": "querygnqa is missing in the request"}), 400 - try: fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") if fahamu_token is None: - return jsonify({"query": query, - "error": "Use of invalid fahamu auth token"}), 500 + raise LLMError("Request failed:an LLM authorisation token is required ", query=query) task_id, answer, refs = get_gnqa( query, fahamu_token, current_app.config.get("DATA_DIR")) response = { @@ -46,16 +47,16 @@ def gnqna(): json.dumps(response) ) return response - except Exception: # handle specific error - return response + except _HTTPException as httpe: + raise AuthorisationError("Authentication is required.") from httpe except LLMError as error: - return jsonify({"query": query, - "error": f"Request failed-{str(error)}"}), 500 + raise LLMError(f"request failed for query {str(error.args[-1])}", + query=query) from error @gnqa.route("/rating/", methods=["POST"]) @require_oauth("profile") -def rating(task_id): +def rate_queries(task_id): """Endpoint for rating qnqa query and answer""" try: llm_db_path = current_app.config["LLM_DB_PATH"] @@ -87,19 +88,25 @@ def rating(task_id): "You have successfully rated this query:Thank you!!" }, 200 except sqlite3.Error as error: - return jsonify({"error": str(error)}), 500 + raise sqlite3.OperationalError from error + except _HTTPException as httpe: + raise AuthorisationError("Authentication is required") from httpe -@gnqa.route("/searches", methods=["GET"]) +@gnqa.route("/history", methods=["GET"]) @require_oauth("profile user") -def fetch_prev_searches(): +def fetch_prev_history(): """ api method to fetch search query records""" - with (require_oauth.acquire("profile user") as the_token, - Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn): - if request.args.get("search_term"): - return jsonify(json.loads(redis_conn.get(request.args.get("search_term")))) - query_result = {} - for key in redis_conn.scan_iter(f"LLM:{str(the_token.user.user_id)}*"): - query_result[key] = json.loads(redis_conn.get(key)) - return jsonify(query_result) + try: + + with (require_oauth.acquire("profile user") as the_token, + Redis.from_url(current_app.config["REDIS_URI"], + decode_responses=True) as redis_conn): + if request.args.get("search_term"): + return jsonify(json.loads(redis_conn.get(request.args.get("search_term")))) + query_result = {} + for key in redis_conn.scan_iter(f"LLM:{str(the_token.user.user_id)}*"): + query_result[key] = json.loads(redis_conn.get(key)) + return jsonify(query_result) + except redis.exceptions.RedisError as error: + raise error -- cgit v1.2.3 From c280bae2bd97d17189763d3ce76cfdfc35588fd2 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 22 May 2024 18:11:58 +0300 Subject: This commit does the following: * Adds a new table to store the history records. * Remove the redis dependancy. --- gn3/api/llm.py | 52 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 8e8a50f..2ed52eb 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,8 +1,6 @@ """Api endpoints for gnqa""" import json import sqlite3 -import redis -from redis import Redis from authlib.integrations.flask_oauth2.errors import _HTTPException from flask import Blueprint @@ -39,16 +37,26 @@ def gnqna(): "references": refs } try: - with (Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn, + with (db.connection(current_app.config["LLM_DB_PATH"]) as conn, require_oauth.acquire("profile user") as token): - redis_conn.set( - f"LLM:{str(token.user.user_id)}-{str(task_id['task_id'])}", - json.dumps(response) + schema = """CREATE TABLE IF NOT EXISTS + history(user_id TEXT NOT NULL, + task_id TEXT NOT NULL, + query TEXT NOT NULL, + results TEXT, + PRIMARY KEY(task_id)) WITHOUT ROWID""" + cursor = conn.cursor() + cursor.execute(schema) + cursor.execute("""INSERT INTO history(user_id,task_id,query,results) + VALUES(?,?,?,?) + """,(str(token.user.user_id),str(task_id["task_id"]),query, + json.dumps(response)) ) - return response + return response except _HTTPException as httpe: raise AuthorisationError("Authentication is required.") from httpe + except sqlite3.Error as error: + raise error except LLMError as error: raise LLMError(f"request failed for query {str(error.args[-1])}", query=query) from error @@ -92,21 +100,25 @@ def rate_queries(task_id): except _HTTPException as httpe: raise AuthorisationError("Authentication is required") from httpe - @gnqa.route("/history", methods=["GET"]) @require_oauth("profile user") def fetch_prev_history(): """ api method to fetch search query records""" try: - - with (require_oauth.acquire("profile user") as the_token, - Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn): + llm_db_path = current_app.config["LLM_DB_PATH"] + with (require_oauth.acquire("profile user") as token, + db.connection(llm_db_path) as conn): + cursor = conn.cursor() if request.args.get("search_term"): - return jsonify(json.loads(redis_conn.get(request.args.get("search_term")))) - query_result = {} - for key in redis_conn.scan_iter(f"LLM:{str(the_token.user.user_id)}*"): - query_result[key] = json.loads(redis_conn.get(key)) - return jsonify(query_result) - except redis.exceptions.RedisError as error: - raise error + query = """SELECT results from history Where task_id=? and user_id=?""" + cursor.execute(query, (request.args.get("search_term") + ,str(token.user.user_id),)) + return dict(cursor.fetchone()) + query = """SELECT task_id,query from history WHERE user_id=?""" + cursor.execute(query, (str(token.user.user_id),)) + return [dict(item) for item in cursor.fetchall()] + + except sqlite3.Error as error: #please handle me corrrectly + return jsonify({"error":error}), 500 + except _HTTPException as httpe: + raise AuthorisationError("Authorization is required") from httpe -- cgit v1.2.3 From 7d3b2a29f5497d88b7c1391a7f5631591889ab36 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 23 May 2024 12:10:52 +0300 Subject: Refactor error handling code. --- gn3/api/llm.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 2ed52eb..172e49b 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -8,7 +8,6 @@ from flask import current_app from flask import jsonify from flask import request - from gn3.llms.process import get_gnqa from gn3.llms.errors import LLMError from gn3.auth.authorisation.oauth2.resource_server import require_oauth @@ -27,7 +26,8 @@ def gnqna(): try: fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") if fahamu_token is None: - raise LLMError("Request failed:an LLM authorisation token is required ", query=query) + raise LLMError( + "Request failed:an LLM authorisation token is required ", query=query) task_id, answer, refs = get_gnqa( query, fahamu_token, current_app.config.get("DATA_DIR")) response = { @@ -49,14 +49,14 @@ def gnqna(): cursor.execute(schema) cursor.execute("""INSERT INTO history(user_id,task_id,query,results) VALUES(?,?,?,?) - """,(str(token.user.user_id),str(task_id["task_id"]),query, - json.dumps(response)) + """, (str(token.user.user_id), str(task_id["task_id"]), query, + json.dumps(response)) ) return response except _HTTPException as httpe: raise AuthorisationError("Authentication is required.") from httpe except sqlite3.Error as error: - raise error + raise sqlite3.OperationalError(*error.args) from error except LLMError as error: raise LLMError(f"request failed for query {str(error.args[-1])}", query=query) from error @@ -96,14 +96,15 @@ def rate_queries(task_id): "You have successfully rated this query:Thank you!!" }, 200 except sqlite3.Error as error: - raise sqlite3.OperationalError from error + raise sqlite3.OperationalError(*error.args) from error except _HTTPException as httpe: raise AuthorisationError("Authentication is required") from httpe + @gnqa.route("/history", methods=["GET"]) @require_oauth("profile user") def fetch_prev_history(): - """ api method to fetch search query records""" + """ api method to fetch search query records""" # ro only try: llm_db_path = current_app.config["LLM_DB_PATH"] with (require_oauth.acquire("profile user") as token, @@ -111,14 +112,13 @@ def fetch_prev_history(): cursor = conn.cursor() if request.args.get("search_term"): query = """SELECT results from history Where task_id=? and user_id=?""" - cursor.execute(query, (request.args.get("search_term") - ,str(token.user.user_id),)) + cursor.execute(query, (request.args.get( + "search_term"), str(token.user.user_id),)) return dict(cursor.fetchone()) query = """SELECT task_id,query from history WHERE user_id=?""" cursor.execute(query, (str(token.user.user_id),)) - return [dict(item) for item in cursor.fetchall()] - - except sqlite3.Error as error: #please handle me corrrectly - return jsonify({"error":error}), 500 + return jsonify([dict(item) for item in cursor.fetchall()]) + except sqlite3.Error as error: + raise sqlite3.OperationalError(*error.args) from error except _HTTPException as httpe: raise AuthorisationError("Authorization is required") from httpe -- cgit v1.2.3 From d4b0ae19e55a45eed7b6bca43abb5340f58ccfbe Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 23 May 2024 12:17:40 +0300 Subject: rename gnqna route to search. --- gn3/api/llm.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 172e49b..e8ffa1a 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -17,17 +17,18 @@ from gn3.auth import db gnqa = Blueprint("gnqa", __name__) -@gnqa.route("/gnqna", methods=["POST"]) -def gnqna(): +@gnqa.route("/search", methods=["POST"]) +def search(): """Main gnqa endpoint""" query = request.json.get("querygnqa", "") if not query: return jsonify({"error": "querygnqa is missing in the request"}), 400 try: fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") - if fahamu_token is None: + if not fahamu_token: raise LLMError( - "Request failed:an LLM authorisation token is required ", query=query) + "Request failed:an LLM authorisation token is required ", + query=query) task_id, answer, refs = get_gnqa( query, fahamu_token, current_app.config.get("DATA_DIR")) response = { @@ -39,17 +40,18 @@ def gnqna(): try: with (db.connection(current_app.config["LLM_DB_PATH"]) as conn, require_oauth.acquire("profile user") as token): - schema = """CREATE TABLE IF NOT EXISTS + cursor = conn.cursor() + cursor.execute("""CREATE TABLE IF NOT EXISTS history(user_id TEXT NOT NULL, task_id TEXT NOT NULL, query TEXT NOT NULL, results TEXT, - PRIMARY KEY(task_id)) WITHOUT ROWID""" - cursor = conn.cursor() - cursor.execute(schema) - cursor.execute("""INSERT INTO history(user_id,task_id,query,results) + PRIMARY KEY(task_id)) WITHOUT ROWID""") + cursor.execute( + """INSERT INTO history(user_id,task_id,query,results) VALUES(?,?,?,?) - """, (str(token.user.user_id), str(task_id["task_id"]), query, + """, (str(token.user.user_id), str(task_id["task_id"]), + query, json.dumps(response)) ) return response @@ -104,7 +106,7 @@ def rate_queries(task_id): @gnqa.route("/history", methods=["GET"]) @require_oauth("profile user") def fetch_prev_history(): - """ api method to fetch search query records""" # ro only + """ api method to fetch search query records""" try: llm_db_path = current_app.config["LLM_DB_PATH"] with (require_oauth.acquire("profile user") as token, -- cgit v1.2.3 From ef955f9b456a591f64faa428b8ef83252923bb63 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 23 May 2024 12:29:04 +0300 Subject: Remove irrelevant variable assignments. --- gn3/api/llm.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index e8ffa1a..5ad58cb 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -69,9 +69,8 @@ def search(): def rate_queries(task_id): """Endpoint for rating qnqa query and answer""" try: - llm_db_path = current_app.config["LLM_DB_PATH"] with (require_oauth.acquire("profile") as token, - db.connection(llm_db_path) as conn): + db.connection(current_app.config["LLM_DB_PATH"]) as conn): results = request.json user_id, query, answer, weight = (token.user.user_id, @@ -108,17 +107,18 @@ def rate_queries(task_id): def fetch_prev_history(): """ api method to fetch search query records""" try: - llm_db_path = current_app.config["LLM_DB_PATH"] with (require_oauth.acquire("profile user") as token, - db.connection(llm_db_path) as conn): + db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() if request.args.get("search_term"): - query = """SELECT results from history Where task_id=? and user_id=?""" - cursor.execute(query, (request.args.get( - "search_term"), str(token.user.user_id),)) + cursor.execute( + """SELECT results from history Where task_id=? and user_id=?""", + (request.args.get("search_term"), + str(token.user.user_id),)) return dict(cursor.fetchone()) - query = """SELECT task_id,query from history WHERE user_id=?""" - cursor.execute(query, (str(token.user.user_id),)) + cursor.execute( + """SELECT task_id,query from history WHERE user_id=?""", + (str(token.user.user_id),)) return jsonify([dict(item) for item in cursor.fetchall()]) except sqlite3.Error as error: raise sqlite3.OperationalError(*error.args) from error -- cgit v1.2.3 From d9233e0a4811203e59161b635c33a7c1b753b3d8 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 14:20:02 +0300 Subject: Remove redundant llm base class exception. --- gn3/llms/errors.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py index 3512f4d..c5439d6 100644 --- a/gn3/llms/errors.py +++ b/gn3/llms/errors.py @@ -35,11 +35,7 @@ class UnprocessableEntity(HTTPError): msg, request=request, response=response) -class LLMErrorMIxins(Exception): - """base class for llm errors""" - - -class LLMError(LLMErrorMIxins): +class LLMError(Exception): """custom exception for LLMErrorMIxins""" def __init__(self, *args, **kwargs): super().__init__(*args) -- cgit v1.2.3 From 2518b60722e5248db363405dee68d9edb0e79601 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 15:14:31 +0300 Subject: Update custom_request to raise LLMError for Exceptions. --- gn3/llms/client.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index d57bca2..99df36a 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -105,22 +105,25 @@ class GeneNetworkQAClient(Session): """ make custom request to fahamu api ask and get response""" max_retries = 50 retry_delay = 3 + response_msg = { + 404: "Api endpoint Does not exist", + 500: "Use of Invalid Token/or the Fahamu Api is currently down", + 400: "You sent a bad Fahamu request", + 401: "You do not have authorization to perform the request", + } for _i in range(max_retries): - try: - response = super().request(method, url, *args, **kwargs) - response.raise_for_status() - if response.ok: - if method.lower() == "get" and response.json().get("data") is None: - time.sleep(retry_delay) - continue - return response - else: + response = super().request(method, url, *args, **kwargs) + if response.ok: + if method.lower() == "get" and response.json().get("data") is None: + # note this is a dirty trick to check if fahamu has returned the results + # the issue is that the api only returns 500 or 200 satus code + # TODO: fix this on their end time.sleep(retry_delay) - except requests.exceptions.HTTPError as error: - if error.response.status_code == 500: - raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error - raise LLMError(error.request, error.response, - f"HTTP error occurred with error status:{error.response.status_code}") from error - except requests.exceptions.RequestException as error: - raise error - raise TimeoutError + continue + return response + else: + raise LLMError( f"Request error with code:\ + {response.status_code} occurred with reason:\ + {response_msg.get(response.status_code,response.reason)}") + #time.sleep(retry_delay) + raise LLMError("Time error occurred when querying the fahamu Api,Please a try the search again") -- cgit v1.2.3 From e75d484bf786176edcac4edfff65e3035fd493eb Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 15:22:08 +0300 Subject: Remove try/block for get_answer/ask methods:Exception already raised --- gn3/llms/client.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 99df36a..07e9506 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -86,20 +86,13 @@ class GeneNetworkQAClient(Session): def ask(self, ex_url, *args, **kwargs): """fahamu ask api interface""" res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs) - if res.status_code != 200: - return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 return res, json.loads(res.text) def get_answer(self, taskid, *args, **kwargs): """Fahamu get answer interface""" - try: - query = f"{self.answer_url}?task_id={taskid['task_id']}" - res = self.custom_request('GET', query, *args, **kwargs) - if res.status_code != 200: - return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 - return res, 1 - except TimeoutError: - return "Timeout error occured:try to rephrase your query", 0 + query = f"{self.answer_url}?task_id={taskid['task_id']}" + res = self.custom_request('GET', query, *args, **kwargs) + return res, 1 def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" @@ -126,4 +119,4 @@ class GeneNetworkQAClient(Session): {response.status_code} occurred with reason:\ {response_msg.get(response.status_code,response.reason)}") #time.sleep(retry_delay) - raise LLMError("Time error occurred when querying the fahamu Api,Please a try the search again") + raise LLMError("Time error: Please try to rephrase of query to get an answer") -- cgit v1.2.3 From 9806f3cf708f8eb4e1248eb5059deee53a3887c6 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 15:33:51 +0300 Subject: Initialize new class attribute self.query for to pass to LLMError. --- gn3/llms/client.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 07e9506..fa1e36e 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -55,6 +55,7 @@ class GeneNetworkQAClient(Session): self.base_url = "https://genenetwork.fahamuai.com/api/tasks" self.answer_url = f"{self.base_url}/answers" self.feedback_url = f"{self.base_url}/feedback" + self.query = "" adapter = TimeoutHTTPAdapter( timeout=timeout, @@ -83,8 +84,9 @@ class GeneNetworkQAClient(Session): """ handler for non 200 response from fahamu api""" return f"Error: Status code -{response.status_code}- Reason::{response.reason}" - def ask(self, ex_url, *args, **kwargs): + def ask(self, ex_url, query, *args, **kwargs): """fahamu ask api interface""" + self.query = query res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs) return res, json.loads(res.text) @@ -117,6 +119,8 @@ class GeneNetworkQAClient(Session): else: raise LLMError( f"Request error with code:\ {response.status_code} occurred with reason:\ - {response_msg.get(response.status_code,response.reason)}") + {response_msg.get(response.status_code,response.reason)}", + query=self.query) #time.sleep(retry_delay) - raise LLMError("Time error: Please try to rephrase of query to get an answer") + raise LLMError("Time error: Please try to rephrase of query to get an answer", + query=self.query) -- cgit v1.2.3 From 13bb57cbd191ffe6e40e830ca08b9191b2dc5700 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 15:34:53 +0300 Subject: Pass query as an argument to api_client ask method. --- gn3/llms/process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index eba7e4b..d53a7fd 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -118,7 +118,7 @@ def get_gnqa(query, auth_token, data_dir=""): """ api_client = GeneNetworkQAClient(api_key=auth_token) - res, task_id = api_client.ask('?ask=' + quote(query), auth_token) + res, task_id = api_client.ask('?ask=' + quote(query), query=query) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") res, status = api_client.get_answer(task_id) -- cgit v1.2.3 From ee1345535817a6b3e6943b268057f53840d78b8f Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 15:48:45 +0300 Subject: Check for null and empty data results and update timeout message --- gn3/llms/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index fa1e36e..5c4fa0e 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -109,7 +109,7 @@ class GeneNetworkQAClient(Session): for _i in range(max_retries): response = super().request(method, url, *args, **kwargs) if response.ok: - if method.lower() == "get" and response.json().get("data") is None: + if method.lower() == "get" and not (response.json().get("data")): # note this is a dirty trick to check if fahamu has returned the results # the issue is that the api only returns 500 or 200 satus code # TODO: fix this on their end @@ -122,5 +122,6 @@ class GeneNetworkQAClient(Session): {response_msg.get(response.status_code,response.reason)}", query=self.query) #time.sleep(retry_delay) - raise LLMError("Time error: Please try to rephrase of query to get an answer", + raise LLMError("Time error: We couldn't provide a response,Please try\ + to rephrase your question to receive feedback", query=self.query) -- cgit v1.2.3 From 651f307a4b8e60aaea0c8a7649a5b02aafce7a98 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 15:59:30 +0300 Subject: Removed status check on get_gnqa function. --- gn3/llms/process.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index d53a7fd..ab2a80e 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -116,22 +116,11 @@ def get_gnqa(query, auth_token, data_dir=""): answer references: contains doc_name,reference,pub_med_info """ - api_client = GeneNetworkQAClient(api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), query=query) - if task_id == 0: - raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") - res, status = api_client.get_answer(task_id) - if status == 1: - resp_text = filter_response_text(res.text) - if resp_text.get("data") is None: - return task_id, "Please try to rephrase your question to receive feedback", [] - answer = resp_text['data']['answer'] - context = resp_text['data']['context'] - references = parse_context( - context, DocIDs().get_info, format_bibliography_info) - references = fetch_pubmed(references, "pubmed.json", data_dir) - - return task_id, answer, references - else: - return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", [] + res, _status = api_client.get_answer(task_id) + resp_text = filter_response_text(res.text) + answer = resp_text['data']['answer'] + context = resp_text['data']['context'] + return task_id, answer, fetch_pubmed(parse_context( + context, DocIDs().get_info, format_bibliography_info), "pubmed.json", data_dir) -- cgit v1.2.3 From 6bbe9763a024558f2a4a942d71c799e4583448a2 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 16:06:22 +0300 Subject: Remove Try/Excepts from llm api endponts. --- gn3/api/llm.py | 152 +++++++++++++++++++++++++-------------------------------- 1 file changed, 66 insertions(+), 86 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 5ad58cb..03ce815 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -2,7 +2,6 @@ import json import sqlite3 from authlib.integrations.flask_oauth2.errors import _HTTPException - from flask import Blueprint from flask import current_app from flask import jsonify @@ -14,6 +13,7 @@ from gn3.auth.authorisation.oauth2.resource_server import require_oauth from gn3.auth.authorisation.errors import AuthorisationError from gn3.auth import db + gnqa = Blueprint("gnqa", __name__) @@ -23,104 +23,84 @@ def search(): query = request.json.get("querygnqa", "") if not query: return jsonify({"error": "querygnqa is missing in the request"}), 400 - try: - fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") - if not fahamu_token: - raise LLMError( - "Request failed:an LLM authorisation token is required ", - query=query) - task_id, answer, refs = get_gnqa( - query, fahamu_token, current_app.config.get("DATA_DIR")) - response = { - "task_id": task_id, - "query": query, - "answer": answer, - "references": refs - } - try: - with (db.connection(current_app.config["LLM_DB_PATH"]) as conn, - require_oauth.acquire("profile user") as token): - cursor = conn.cursor() - cursor.execute("""CREATE TABLE IF NOT EXISTS - history(user_id TEXT NOT NULL, - task_id TEXT NOT NULL, - query TEXT NOT NULL, - results TEXT, - PRIMARY KEY(task_id)) WITHOUT ROWID""") - cursor.execute( - """INSERT INTO history(user_id,task_id,query,results) - VALUES(?,?,?,?) - """, (str(token.user.user_id), str(task_id["task_id"]), - query, - json.dumps(response)) - ) - return response - except _HTTPException as httpe: - raise AuthorisationError("Authentication is required.") from httpe - except sqlite3.Error as error: - raise sqlite3.OperationalError(*error.args) from error - except LLMError as error: - raise LLMError(f"request failed for query {str(error.args[-1])}", - query=query) from error + fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") + if not fahamu_token: + raise LLMError( + "Request failed:an LLM authorisation token is required ", + query=query) + task_id, answer, refs = get_gnqa( + query, fahamu_token, current_app.config.get("DATA_DIR")) + response = { + "task_id": task_id, + "query": query, + "answer": answer, + "references": refs + } + with (db.connection(current_app.config["LLM_DB_PATH"]) as conn, + require_oauth.acquire("profile user") as token): + cursor = conn.cursor() + cursor.execute("""CREATE TABLE IF NOT EXISTS + history(user_id TEXT NOT NULL, + task_id TEXT NOT NULL, + query TEXT NOT NULL, + results TEXT, + PRIMARY KEY(task_id)) WITHOUT ROWID""") + cursor.execute( + """INSERT INTO history(user_id, task_id, query, results) + VALUES(?, ?, ?, ?) + """, (str(token.user.user_id), str(task_id["task_id"]), + query, + json.dumps(response)) + ) + return response @gnqa.route("/rating/", methods=["POST"]) @require_oauth("profile") def rate_queries(task_id): """Endpoint for rating qnqa query and answer""" - try: - with (require_oauth.acquire("profile") as token, - db.connection(current_app.config["LLM_DB_PATH"]) as conn): + with (require_oauth.acquire("profile") as token, + db.connection(current_app.config["LLM_DB_PATH"]) as conn): - results = request.json - user_id, query, answer, weight = (token.user.user_id, - results.get("query"), - results.get("answer"), - results.get("weight", 0)) - cursor = conn.cursor() - create_table = """CREATE TABLE IF NOT EXISTS Rating( - user_id TEXT NOT NULL, - query TEXT NOT NULL, - answer TEXT NOT NULL, - weight INTEGER NOT NULL DEFAULT 0, - task_id TEXT NOT NULL UNIQUE - )""" - cursor.execute(create_table) - cursor.execute("""INSERT INTO Rating(user_id,query, - answer,weight,task_id) - VALUES(?,?,?,?,?) - ON CONFLICT(task_id) DO UPDATE SET - weight=excluded.weight - """, (str(user_id), query, answer, weight, task_id)) + results = request.json + user_id, query, answer, weight = (token.user.user_id, + results.get("query"), + results.get("answer"), + results.get("weight", 0)) + cursor = conn.cursor() + create_table = """CREATE TABLE IF NOT EXISTS Rating( + user_id TEXT NOT NULL, + query TEXT NOT NULL, + answer TEXT NOT NULL, + weight INTEGER NOT NULL DEFAULT 0, + task_id TEXT NOT NULL UNIQUE + )""" + cursor.execute(create_table) + cursor.execute("""INSERT INTO Rating(user_id, query, + answer, weight, task_id) + VALUES(?,?,?,?,?) + ON CONFLICT(task_id) DO UPDATE SET + weight=excluded.weight + """, (str(user_id), query, answer, weight, task_id)) return { - "message": - "You have successfully rated this query:Thank you!!" + "message": "You have successfully rated this query.Thank you!" }, 200 - except sqlite3.Error as error: - raise sqlite3.OperationalError(*error.args) from error - except _HTTPException as httpe: - raise AuthorisationError("Authentication is required") from httpe @gnqa.route("/history", methods=["GET"]) @require_oauth("profile user") def fetch_prev_history(): """ api method to fetch search query records""" - try: - with (require_oauth.acquire("profile user") as token, - db.connection(current_app.config["LLM_DB_PATH"]) as conn): - cursor = conn.cursor() - if request.args.get("search_term"): - cursor.execute( - """SELECT results from history Where task_id=? and user_id=?""", - (request.args.get("search_term"), - str(token.user.user_id),)) - return dict(cursor.fetchone()) + with (require_oauth.acquire("profile user") as token, + db.connection(current_app.config["LLM_DB_PATH"]) as conn): + cursor = conn.cursor() + if request.args.get("search_term"): cursor.execute( - """SELECT task_id,query from history WHERE user_id=?""", - (str(token.user.user_id),)) - return jsonify([dict(item) for item in cursor.fetchall()]) - except sqlite3.Error as error: - raise sqlite3.OperationalError(*error.args) from error - except _HTTPException as httpe: - raise AuthorisationError("Authorization is required") from httpe + """SELECT results from history Where task_id=? and user_id=?""", + (request.args.get("search_term"), + str(token.user.user_id),)) + return dict(cursor.fetchone())["results"] + cursor.execute( + """SELECT task_id,query from history WHERE user_id=?""", + (str(token.user.user_id),)) + return jsonify([dict(item) for item in cursor.fetchall()]) -- cgit v1.2.3 From 67c71507c84d474ac13681f16d994e7967321ddb Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 16:21:50 +0300 Subject: Return first argument as error message. --- gn3/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/errors.py b/gn3/errors.py index ac9e070..ad08ae5 100644 --- a/gn3/errors.py +++ b/gn3/errors.py @@ -112,7 +112,7 @@ def handle_llm_error(exc: Exception) -> Response: "query": exc.query if exc.query else "", "error_type": type(exc).__name__, "error": ( - exc.args if bool(exc.args) else "Fahamu gnqa error occurred" + exc.args[0] if bool(exc.args) else "Fahamu gnqa error occurred" ), "trace": traceback.format_exc() }) -- cgit v1.2.3 From 8512d9a606fbfff864345d82c210e281a6d943bf Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 16:27:51 +0300 Subject: Initiliaze second args to LLMError as query parameter. --- gn3/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/errors.py b/gn3/errors.py index ad08ae5..8331028 100644 --- a/gn3/errors.py +++ b/gn3/errors.py @@ -109,7 +109,7 @@ def handle_generic(exc: Exception) -> Response: def handle_llm_error(exc: Exception) -> Response: """ Handle llm erros if not handled anywhere else. """ resp = jsonify({ - "query": exc.query if exc.query else "", + "query": exc.args[1], "error_type": type(exc).__name__, "error": ( exc.args[0] if bool(exc.args) else "Fahamu gnqa error occurred" -- cgit v1.2.3 From 188f84ef3895de613e998c63da7ec2338e25a55c Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 16:30:59 +0300 Subject: Remove kwargs from LLMErrorr Exceptions and update relevant code. --- gn3/api/llm.py | 7 +------ gn3/llms/client.py | 6 +++--- gn3/llms/errors.py | 1 - 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 03ce815..c1f6304 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,7 +1,5 @@ """Api endpoints for gnqa""" import json -import sqlite3 -from authlib.integrations.flask_oauth2.errors import _HTTPException from flask import Blueprint from flask import current_app from flask import jsonify @@ -10,7 +8,6 @@ from flask import request from gn3.llms.process import get_gnqa from gn3.llms.errors import LLMError from gn3.auth.authorisation.oauth2.resource_server import require_oauth -from gn3.auth.authorisation.errors import AuthorisationError from gn3.auth import db @@ -26,8 +23,7 @@ def search(): fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") if not fahamu_token: raise LLMError( - "Request failed:an LLM authorisation token is required ", - query=query) + "Request failed:an LLM authorisation token is required ", query) task_id, answer, refs = get_gnqa( query, fahamu_token, current_app.config.get("DATA_DIR")) response = { @@ -61,7 +57,6 @@ def rate_queries(task_id): """Endpoint for rating qnqa query and answer""" with (require_oauth.acquire("profile") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): - results = request.json user_id, query, answer, weight = (token.user.user_id, results.get("query"), diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 5c4fa0e..d29d2a1 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -117,11 +117,11 @@ class GeneNetworkQAClient(Session): continue return response else: - raise LLMError( f"Request error with code:\ + raise LLMError(f"Request error with code:\ {response.status_code} occurred with reason:\ {response_msg.get(response.status_code,response.reason)}", - query=self.query) + self.query) #time.sleep(retry_delay) raise LLMError("Time error: We couldn't provide a response,Please try\ to rephrase your question to receive feedback", - query=self.query) + self.query) diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py index c5439d6..77e0f9a 100644 --- a/gn3/llms/errors.py +++ b/gn3/llms/errors.py @@ -39,4 +39,3 @@ class LLMError(Exception): """custom exception for LLMErrorMIxins""" def __init__(self, *args, **kwargs): super().__init__(*args) - self.query = kwargs.get("query") -- cgit v1.2.3 From 3ea881f3bc28e8087be08f1d507991ac9b2a4230 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 16:36:57 +0300 Subject: Pylint fixes. --- gn3/llms/client.py | 4 ++-- gn3/llms/errors.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index d29d2a1..ad6c400 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -109,7 +109,7 @@ class GeneNetworkQAClient(Session): for _i in range(max_retries): response = super().request(method, url, *args, **kwargs) if response.ok: - if method.lower() == "get" and not (response.json().get("data")): + if method.lower() == "get" and not response.json().get("data"): # note this is a dirty trick to check if fahamu has returned the results # the issue is that the api only returns 500 or 200 satus code # TODO: fix this on their end @@ -122,6 +122,6 @@ class GeneNetworkQAClient(Session): {response_msg.get(response.status_code,response.reason)}", self.query) #time.sleep(retry_delay) - raise LLMError("Time error: We couldn't provide a response,Please try\ + raise LLMError("Timeout error: We couldn't provide a response,Please try\ to rephrase your question to receive feedback", self.query) diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py index 77e0f9a..a3a47a3 100644 --- a/gn3/llms/errors.py +++ b/gn3/llms/errors.py @@ -37,5 +37,3 @@ class UnprocessableEntity(HTTPError): class LLMError(Exception): """custom exception for LLMErrorMIxins""" - def __init__(self, *args, **kwargs): - super().__init__(*args) -- cgit v1.2.3 From a3284926aaa73ce27565e4a1131d4447a3a7face Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 17:18:55 +0300 Subject: Add created_at timestamp to History table. --- gn3/api/llm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index c1f6304..5924f12 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -40,6 +40,7 @@ def search(): task_id TEXT NOT NULL, query TEXT NOT NULL, results TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(task_id)) WITHOUT ROWID""") cursor.execute( """INSERT INTO history(user_id, task_id, query, results) -- cgit v1.2.3 From 22a1517e71ff6058090596498b9e829fb4e19664 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 17:34:59 +0300 Subject: Add created_at timestamp for Rating table. --- gn3/api/llm.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 5924f12..ab33c7a 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,5 +1,6 @@ """Api endpoints for gnqa""" import json +from datetime import datetime, timezone from flask import Blueprint from flask import current_app from flask import jsonify @@ -69,15 +70,17 @@ def rate_queries(task_id): query TEXT NOT NULL, answer TEXT NOT NULL, weight INTEGER NOT NULL DEFAULT 0, - task_id TEXT NOT NULL UNIQUE + task_id TEXT NOT NULL UNIQUE, + created_at TIMESTAMP )""" cursor.execute(create_table) cursor.execute("""INSERT INTO Rating(user_id, query, - answer, weight, task_id) - VALUES(?,?,?,?,?) + answer, weight, task_id, created_at) + VALUES(?, ?, ?, ?, ?, ?) ON CONFLICT(task_id) DO UPDATE SET weight=excluded.weight - """, (str(user_id), query, answer, weight, task_id)) + """, (str(user_id), query, answer, weight, task_id, + datetime.now(timezone.utc))) return { "message": "You have successfully rated this query.Thank you!" }, 200 -- cgit v1.2.3 From f6dfb1a3d06b75ef74cfdc295bd7f30390d2a4d8 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 18:38:18 +0300 Subject: sql: update: llm_db_update.sql: New file. --- sql/update/llm_db_update.sql | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 sql/update/llm_db_update.sql diff --git a/sql/update/llm_db_update.sql b/sql/update/llm_db_update.sql new file mode 100644 index 0000000..71f7491 --- /dev/null +++ b/sql/update/llm_db_update.sql @@ -0,0 +1,37 @@ +-- llm_db_update.sql --- + +-- Copyright (C) 2022 Alexander kabui + +-- Author: Alexander Kabui + +-- This program is free software; you can redistribute it and/or +-- modify it under the terms of the GNU General Public License +-- as published by the Free Software Foundation; either version 3 +-- of the License, or (at your option) any later version. + +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. + +-- You should have received a copy of the GNU General Public License +-- along with this program. If not, see . + +-- Sql file to create the history table, adding indexing for the history table +-- and adding timestamp column the Rating table + + +CREATE TABLE IF NOT EXISTS history ( + user_id TEXT NOT NULL, + task_id TEXT NOT NULL, + query TEXT NOT NULL, + results TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (task_id) +) WITHOUT ROWID; + + +CREATE INDEX IF NOT EXISTS idx_tbl_history_cols_task_id_user_id +ON history (task_id, user_id); + +ALTER TABLE Rating ADD COLUMN created_at TIMESTAMP; -- cgit v1.2.3 From 666461bcf6afc811e4c21dd23dbef2711a07049a Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 18:55:49 +0300 Subject: Update copyright year and email. --- sql/update/llm_db_update.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/update/llm_db_update.sql b/sql/update/llm_db_update.sql index 71f7491..6608a90 100644 --- a/sql/update/llm_db_update.sql +++ b/sql/update/llm_db_update.sql @@ -1,6 +1,6 @@ -- llm_db_update.sql --- --- Copyright (C) 2022 Alexander kabui +-- Copyright (C) 2024 Alexander kabui -- Author: Alexander Kabui -- cgit v1.2.3 From 105f2b36eb62b9b097e1cbf6fa815f98da77bc16 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 14:09:02 +0300 Subject: Update Docstrings for Api endpoints and functions. --- gn3/api/llm.py | 6 +++--- gn3/llms/process.py | 48 +++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index ab33c7a..4b8ec52 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -17,7 +17,7 @@ gnqa = Blueprint("gnqa", __name__) @gnqa.route("/search", methods=["POST"]) def search(): - """Main gnqa endpoint""" + """Api endpoint for searching queries in fahamu Api""" query = request.json.get("querygnqa", "") if not query: return jsonify({"error": "querygnqa is missing in the request"}), 400 @@ -56,7 +56,7 @@ def search(): @gnqa.route("/rating/", methods=["POST"]) @require_oauth("profile") def rate_queries(task_id): - """Endpoint for rating qnqa query and answer""" + """Api endpoint for rating GNQA query and answer""" with (require_oauth.acquire("profile") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): results = request.json @@ -89,7 +89,7 @@ def rate_queries(task_id): @gnqa.route("/history", methods=["GET"]) @require_oauth("profile user") def fetch_prev_history(): - """ api method to fetch search query records""" + """Api endpoint to fetch GNQA previous search.""" with (require_oauth.acquire("profile user") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() diff --git a/gn3/llms/process.py b/gn3/llms/process.py index ab2a80e..ade4104 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -35,7 +35,7 @@ class DocIDs(): raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") def format_doc_ids(self, docs): - """method to format doc_ids for list items""" + """method to format doc_ids for list items doc_id and doc_name""" for _key, val in docs.items(): if isinstance(val, list): for doc_obj in val: @@ -43,7 +43,14 @@ class DocIDs(): self.doc_ids.update({doc_obj["id"]: doc_name}) def get_info(self, doc_id): - """ interface to make read from doc_ids""" + """ interface to make read from doc_ids + and extract info data else returns + doc_id + Args: + doc_id: str: a search key for doc_ids + Returns: + an object with doc_info if doc_id in doc_ids + """ if doc_id in self.doc_ids.keys(): return self.doc_ids[doc_id] else: @@ -51,7 +58,8 @@ class DocIDs(): def format_bibliography_info(bib_info): - """Function for formatting bibliography info""" + """Utility function for formatting bibliography info + """ if isinstance(bib_info, str): return bib_info.removesuffix('.txt') elif isinstance(bib_info, dict): @@ -66,7 +74,15 @@ def filter_response_text(val): def parse_context(context, get_info_func, format_bib_func): - """function to parse doc_ids content""" + """Function to parse doc_ids content + Args: + context: raw references from fahamu api + get_info_func: function to get doc_ids info + format_bib_func: function to foramt bibliography info + Returns: + an list with each item having (doc_id,bib_info, + combined reference text) + """ results = [] for doc_ids, summary in context.items(): combo_txt = "" @@ -81,7 +97,12 @@ def parse_context(context, get_info_func, format_bib_func): def load_file(filename, dir_path): - """function to open and load json file""" + """Utility function to read json file + Args: + filename: file name to read + dir_path: base directory for the file + Returns: json data read to a dict + """ file_path = os.path.join(dir_path, f"{filename}") if not os.path.isfile(file_path): raise FileNotFoundError(f"{filename} was not found or is a directory") @@ -90,8 +111,19 @@ def load_file(filename, dir_path): def fetch_pubmed(references, file_name, data_dir=""): - """method to fetch and populate references with pubmed""" + """ + Fetches PubMed data from a JSON file and populates the\ + references dictionary. + + Args: + references (dict): Dictionary with document IDs as keys\ + and reference data as values. + filename (str): Name of the JSON file containing PubMed data. + data_dir (str): Base directory where the data files are located. + Returns: + dict: Updated references dictionary populated with the PubMed data. + """ try: pubmed = load_file(file_name, os.path.join(data_dir, "gn-meta/lit")) for reference in references: @@ -123,4 +155,6 @@ def get_gnqa(query, auth_token, data_dir=""): answer = resp_text['data']['answer'] context = resp_text['data']['context'] return task_id, answer, fetch_pubmed(parse_context( - context, DocIDs().get_info, format_bibliography_info), "pubmed.json", data_dir) + context, DocIDs().get_info, + format_bibliography_info), + "pubmed.json", data_dir) -- cgit v1.2.3 From d0801cea229d00d5d4ce19fa1cb36242e56070d1 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 14:18:48 +0300 Subject: Delete filter response text method and update relevant code. --- gn3/llms/process.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index ade4104..2ce6b2b 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -67,12 +67,6 @@ def format_bibliography_info(bib_info): return bib_info -def filter_response_text(val): - """helper function for filtering non-printable chars""" - return json.loads(''.join([str(char) - for char in val if char in string.printable])) - - def parse_context(context, get_info_func, format_bib_func): """Function to parse doc_ids content Args: @@ -151,7 +145,8 @@ def get_gnqa(query, auth_token, data_dir=""): api_client = GeneNetworkQAClient(api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), query=query) res, _status = api_client.get_answer(task_id) - resp_text = filter_response_text(res.text) + resp_text = json.loads(''.join([str(char) + for char in res.text if char in string.printable])) answer = resp_text['data']['answer'] context = resp_text['data']['context'] return task_id, answer, fetch_pubmed(parse_context( -- cgit v1.2.3 From 8aeb4e00af2651cf7ec55f2ace23c600f537ff77 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 14:33:44 +0300 Subject: Delete llm obsolete unittests --- tests/unit/test_llm.py | 65 -------------------------------------------------- 1 file changed, 65 deletions(-) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 7b8a970..6e0f2af 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -35,68 +35,3 @@ def test_parse_context(): ] assert parsed_result == expected_result - - -@dataclass(frozen=True) -class MockResponse: - """mock a response object""" - text: str - - def __getattr__(self, name: str): - return self.__dict__[f"_{name}"] - - -class MockGeneNetworkQAClient: - """mock the GeneNetworkQAClient class""" - - def __init__(self, session, api_key): - pass - - def ask(self, query, auth_token): - """mock method for ask query""" - # Simulate the ask method - return MockResponse("Mock response"), "F400995EAFE104EA72A5927CE10C73B7" - - def get_answer(self, task_id): - """mock get_answer method""" - return MockResponse("Mock answer"), 1 - - -def mock_filter_response_text(text): - """ method to simulate the filterResponseText method""" - return {"data": {"answer": "Mock answer for what is a gene", "context": {}}} - - -def mock_parse_context(context, get_info_func, format_bib_func): - """method to simulate the parse context method""" - return [] - - -@pytest.mark.unit_test -def test_get_gnqa(monkeypatch): - """test for process.get_gnqa functoin""" - monkeypatch.setattr( - "gn3.llms.process.GeneNetworkQAClient", - MockGeneNetworkQAClient - ) - - monkeypatch.setattr( - 'gn3.llms.process.filter_response_text', - mock_filter_response_text - ) - monkeypatch.setattr( - 'gn3.llms.process.parse_context', - mock_parse_context - ) - - query = "What is a gene" - auth_token = "test_token" - result = get_gnqa(query, auth_token) - - expected_result = ( - "F400995EAFE104EA72A5927CE10C73B7", - 'Mock answer for what is a gene', - [] - ) - - assert result == expected_result -- cgit v1.2.3 From 58fbc6527537cb229ded87eea57949c3cf02621f Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 14:39:38 +0300 Subject: Remove duplicate code for loading files. --- gn3/llms/process.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 2ce6b2b..40e53c5 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -21,19 +21,10 @@ class DocIDs(): * doc_ids.json: opens doc)ids for gn references * sugar_doc_ids: open doci_ids for diabetes references """ - self.doc_ids = self.load_file("doc_ids.json") - self.sugar_doc_ids = self.load_file("all_files.json") + self.doc_ids = load_file("doc_ids.json", BASEDIR) + self.sugar_doc_ids = load_file("all_files.json", BASEDIR) self.format_doc_ids(self.sugar_doc_ids) - def load_file(self, file_name): - """Method to load and read doc_id files""" - file_path = os.path.join(BASEDIR, file_name) - if os.path.isfile(file_path): - with open(file_path, "rb") as file_handler: - return json.load(file_handler) - else: - raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") - def format_doc_ids(self, docs): """method to format doc_ids for list items doc_id and doc_name""" for _key, val in docs.items(): -- cgit v1.2.3 From 59a27f884b2821ab9142f5285cd713ec374ea820 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 14:47:19 +0300 Subject: Pylint fixes. --- tests/unit/test_llm.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 6e0f2af..c32e888 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -1,8 +1,5 @@ -# pylint: disable=unused-argument -"""Test cases for procedures defined in llms module""" -from dataclasses import dataclass +"""Test cases for procedures defined in llms """ import pytest -from gn3.llms.process import get_gnqa from gn3.llms.process import parse_context -- cgit v1.2.3 From d3f87b9a02bfec223d23c16eb1374d53065fea92 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 17:37:13 +0300 Subject: Add regular expressions for parsing links in texts. --- gn3/llms/process.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 40e53c5..55c27a0 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -1,6 +1,7 @@ """this module contains code for processing response from fahamu client.py""" # pylint: disable=C0301 import os +import re import string import json import logging @@ -76,8 +77,13 @@ def parse_context(context, get_info_func, format_bib_func): doc_info = get_info_func(doc_ids) bib_info = doc_ids if doc_ids == doc_info else format_bib_func( doc_info) + pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*' + combo_text = re.sub(pattern, + lambda x: f" {x[0]} ", + combo_txt) results.append( - {"doc_id": doc_ids, "bibInfo": bib_info, "comboTxt": combo_txt}) + {"doc_id": doc_ids, "bibInfo": bib_info, + "comboTxt": combo_text}) return results @@ -137,8 +143,10 @@ def get_gnqa(query, auth_token, data_dir=""): res, task_id = api_client.ask('?ask=' + quote(query), query=query) res, _status = api_client.get_answer(task_id) resp_text = json.loads(''.join([str(char) - for char in res.text if char in string.printable])) - answer = resp_text['data']['answer'] + for char in res.text if char in string.printable])) + answer = re.sub(r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*', + lambda x: f" {x[0]} ", + resp_text["data"]["answer"]) context = resp_text['data']['context'] return task_id, answer, fetch_pubmed(parse_context( context, DocIDs().get_info, -- cgit v1.2.3 From 7d79812db623d12474422a9613c81f35e25aef55 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 29 May 2024 14:05:00 +0300 Subject: Add delete functionality for gnqa history. --- gn3/api/llm.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 4b8ec52..228d3fa 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -86,16 +86,24 @@ def rate_queries(task_id): }, 200 -@gnqa.route("/history", methods=["GET"]) +@gnqa.route("/history", methods=["GET", "POST"]) @require_oauth("profile user") def fetch_prev_history(): """Api endpoint to fetch GNQA previous search.""" with (require_oauth.acquire("profile user") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() + if request.method == "POST": + task_ids = list(request.json.values()) + query = """DELETE FROM history + WHERE task_id IN ({}) + and user_id=?""".format(",".join("?" * len(task_ids))) + cursor.execute(query, (*task_ids, str(token.user.user_id),)) + return jsonify({}) if request.args.get("search_term"): cursor.execute( - """SELECT results from history Where task_id=? and user_id=?""", + """SELECT results from history + Where task_id=? and user_id=?""", (request.args.get("search_term"), str(token.user.user_id),)) return dict(cursor.fetchone())["results"] -- cgit v1.2.3 From 231cbd69ed5292451b976091e117d819e5466b30 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 29 Aug 2024 09:51:38 +0300 Subject: Update gnqa search endpoint from POST to PUT. --- gn3/api/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 228d3fa..d56a3d2 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -15,7 +15,7 @@ from gn3.auth import db gnqa = Blueprint("gnqa", __name__) -@gnqa.route("/search", methods=["POST"]) +@gnqa.route("/search", methods=["PUT"]) def search(): """Api endpoint for searching queries in fahamu Api""" query = request.json.get("querygnqa", "") -- cgit v1.2.3 From c32378420d58d6770045cf8d2025dabccb4d1492 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 29 Aug 2024 10:09:24 +0300 Subject: Use correct http method `Delete` for search history. --- gn3/api/llm.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index d56a3d2..a94badd 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -86,21 +86,22 @@ def rate_queries(task_id): }, 200 -@gnqa.route("/history", methods=["GET", "POST"]) +@gnqa.route("/history", methods=["GET", "DELETE"]) @require_oauth("profile user") def fetch_prev_history(): """Api endpoint to fetch GNQA previous search.""" with (require_oauth.acquire("profile user") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() - if request.method == "POST": + if request.method == "DELETE": task_ids = list(request.json.values()) query = """DELETE FROM history WHERE task_id IN ({}) and user_id=?""".format(",".join("?" * len(task_ids))) cursor.execute(query, (*task_ids, str(token.user.user_id),)) return jsonify({}) - if request.args.get("search_term"): + elif (request.method == "GET" and + request.args.get("search_term")): cursor.execute( """SELECT results from history Where task_id=? and user_id=?""", -- cgit v1.2.3 From ade1b4cb03de8a1c670a3e876b8a18692dfc7694 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 29 Aug 2024 10:26:05 +0300 Subject: Check for empty values when fetching search history. --- gn3/api/llm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index a94badd..5901ef5 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -107,7 +107,10 @@ def fetch_prev_history(): Where task_id=? and user_id=?""", (request.args.get("search_term"), str(token.user.user_id),)) - return dict(cursor.fetchone())["results"] + record = cursor.fetchone() + if record: + return dict(record).get("results") + return {} cursor.execute( """SELECT task_id,query from history WHERE user_id=?""", (str(token.user.user_id),)) -- cgit v1.2.3 From 7b17ab9d98f6a12f2a5b9b0eec8ff4c2c3ef2a5e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 29 Aug 2024 10:28:34 +0300 Subject: Add logging for gn llm errors. --- gn3/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gn3/errors.py b/gn3/errors.py index 8331028..c53604f 100644 --- a/gn3/errors.py +++ b/gn3/errors.py @@ -108,6 +108,7 @@ def handle_generic(exc: Exception) -> Response: def handle_llm_error(exc: Exception) -> Response: """ Handle llm erros if not handled anywhere else. """ + current_app.logger.error(exc) resp = jsonify({ "query": exc.args[1], "error_type": type(exc).__name__, -- cgit v1.2.3 From a45ab3e62df6e1dfcc6fff03916369fbdaf68ab8 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 29 Aug 2024 11:05:39 +0300 Subject: Add default timestamp and and primary key for Rating table. --- gn3/api/llm.py | 4 ++-- sql/update/llm_db_update.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 5901ef5..952a5b9 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -71,8 +71,8 @@ def rate_queries(task_id): answer TEXT NOT NULL, weight INTEGER NOT NULL DEFAULT 0, task_id TEXT NOT NULL UNIQUE, - created_at TIMESTAMP - )""" + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(task_id))""" cursor.execute(create_table) cursor.execute("""INSERT INTO Rating(user_id, query, answer, weight, task_id, created_at) diff --git a/sql/update/llm_db_update.sql b/sql/update/llm_db_update.sql index 6608a90..a4eb848 100644 --- a/sql/update/llm_db_update.sql +++ b/sql/update/llm_db_update.sql @@ -34,4 +34,4 @@ CREATE TABLE IF NOT EXISTS history ( CREATE INDEX IF NOT EXISTS idx_tbl_history_cols_task_id_user_id ON history (task_id, user_id); -ALTER TABLE Rating ADD COLUMN created_at TIMESTAMP; +ALTER TABLE Rating ADD COLUMN created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP; -- cgit v1.2.3 From 57b4a4fd5bcb8a2b7f9af856d8f1212c0fbbe0da Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 29 Aug 2024 11:06:07 +0300 Subject: Add sql file for creating llm db tables. --- sql/update/llm_db_tables.sql | 47 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 sql/update/llm_db_tables.sql diff --git a/sql/update/llm_db_tables.sql b/sql/update/llm_db_tables.sql new file mode 100644 index 0000000..a6c0479 --- /dev/null +++ b/sql/update/llm_db_tables.sql @@ -0,0 +1,47 @@ +-- llm_db_update.sql --- + +-- Copyright (C) 2024 Alexander kabui + +-- Author: Alexander Kabui + +-- This program is free software; you can redistribute it and/or +-- modify it under the terms of the GNU General Public License +-- as published by the Free Software Foundation; either version 3 +-- of the License, or (at your option) any later version. + +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. + +-- You should have received a copy of the GNU General Public License +-- along with this program. If not, see . + +-- Sql file to create the tables for history rating and adding indexing for the history table +-- this targets setting up a new db +-- and adding timestamp column the Rating table + + +CREATE TABLE IF NOT EXISTS history ( + user_id TEXT NOT NULL, + task_id TEXT NOT NULL, + query TEXT NOT NULL, + results TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (task_id) +) WITHOUT ROWID; + + +CREATE INDEX IF NOT EXISTS idx_tbl_history_cols_task_id_user_id +ON history (task_id, user_id); + + + +CREATE TABLE IF NOT EXISTS Rating( + user_id TEXT NOT NULL, + query TEXT NOT NULL, + answer TEXT NOT NULL, + weight INTEGER NOT NULL DEFAULT 0, + task_id TEXT NOT NULL UNIQUE, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (task_id)); -- cgit v1.2.3 From 9c6f74d0a3f16a940b333d2a56803ed2e7d7c462 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:05:25 +0300 Subject: Add spacing after punctuation. --- gn3/api/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 952a5b9..836a880 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -24,7 +24,7 @@ def search(): fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") if not fahamu_token: raise LLMError( - "Request failed:an LLM authorisation token is required ", query) + "Request failed: an LLM authorisation token is required ", query) task_id, answer, refs = get_gnqa( query, fahamu_token, current_app.config.get("DATA_DIR")) response = { -- cgit v1.2.3 From 748ebc5baff3412df163e17ee18a8fc688329b1d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:08:59 +0300 Subject: Use default datetime for table. --- gn3/api/llm.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 836a880..a617faf 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -75,12 +75,11 @@ def rate_queries(task_id): PRIMARY KEY(task_id))""" cursor.execute(create_table) cursor.execute("""INSERT INTO Rating(user_id, query, - answer, weight, task_id, created_at) - VALUES(?, ?, ?, ?, ?, ?) + answer, weight, task_id) + VALUES(?, ?, ?, ?, ?) ON CONFLICT(task_id) DO UPDATE SET weight=excluded.weight - """, (str(user_id), query, answer, weight, task_id, - datetime.now(timezone.utc))) + """, (str(user_id), query, answer, weight, task_id)) return { "message": "You have successfully rated this query.Thank you!" }, 200 -- cgit v1.2.3 From 884f57de47dfb7e80fd8ff25760ce5353267964e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:09:56 +0300 Subject: Fix spacing after punctuation. --- gn3/api/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index a617faf..8d17bc2 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -81,7 +81,7 @@ def rate_queries(task_id): weight=excluded.weight """, (str(user_id), query, answer, weight, task_id)) return { - "message": "You have successfully rated this query.Thank you!" + "message": "You have successfully rated this query. Thank you!" }, 200 -- cgit v1.2.3 From 2e81e48695e9b5618746c8cd1c6c83b452836442 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:12:12 +0300 Subject: Fix minor syntax issue. --- gn3/llms/process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 55c27a0..c3e6eda 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -19,8 +19,8 @@ class DocIDs(): def __init__(self): """ init method for Docids - * doc_ids.json: opens doc)ids for gn references - * sugar_doc_ids: open doci_ids for diabetes references + * doc_ids.json: open doc_ids for gn references + * sugar_doc_ids: open doc_ids for diabetes references """ self.doc_ids = load_file("doc_ids.json", BASEDIR) self.sugar_doc_ids = load_file("all_files.json", BASEDIR) -- cgit v1.2.3 From 086c80510ff418bca77f544d3dd4b174d2dc9c8e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:15:01 +0300 Subject: Remove unecessary check for open file. --- gn3/llms/process.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index c3e6eda..ef925c4 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -94,10 +94,8 @@ def load_file(filename, dir_path): dir_path: base directory for the file Returns: json data read to a dict """ - file_path = os.path.join(dir_path, f"{filename}") - if not os.path.isfile(file_path): - raise FileNotFoundError(f"{filename} was not found or is a directory") - with open(file_path, "rb") as file_handler: + with open(os.path.join(dir_path, f"{filename}"), + "rb") as file_handler: return json.load(file_handler) -- cgit v1.2.3 From 742beb6ee663bc9ae5409461d2be4b2144b8893e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:17:12 +0300 Subject: Refactor doc_id object. --- gn3/llms/process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index ef925c4..bfce9a5 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -23,8 +23,8 @@ class DocIDs(): * sugar_doc_ids: open doc_ids for diabetes references """ self.doc_ids = load_file("doc_ids.json", BASEDIR) - self.sugar_doc_ids = load_file("all_files.json", BASEDIR) - self.format_doc_ids(self.sugar_doc_ids) + sugar_doc_ids = load_file("all_files.json", BASEDIR) + self.format_doc_ids(sugar_doc_ids) def format_doc_ids(self, docs): """method to format doc_ids for list items doc_id and doc_name""" -- cgit v1.2.3 From 303fa9ed617ff843d663719dd76d3a08c00cf724 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 16:12:46 +0300 Subject: Rename task_id to task_object. --- gn3/llms/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index ad6c400..c225acc 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -90,9 +90,9 @@ class GeneNetworkQAClient(Session): res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs) return res, json.loads(res.text) - def get_answer(self, taskid, *args, **kwargs): + def get_answer(self, task_obj, *args, **kwargs): """Fahamu get answer interface""" - query = f"{self.answer_url}?task_id={taskid['task_id']}" + query = f"{self.answer_url}?task_id={task_obj['task_id']}" res = self.custom_request('GET', query, *args, **kwargs) return res, 1 -- cgit v1.2.3 From 3de74bf43245088ff4d07af4fd796eb510ff73bb Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 16:39:09 +0300 Subject: Add docs for custom request method. --- gn3/llms/client.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index c225acc..4858ceb 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -97,7 +97,13 @@ class GeneNetworkQAClient(Session): return res, 1 def custom_request(self, method, url, *args, **kwargs): - """ make custom request to fahamu api ask and get response""" + """ + Make a custom request to the Fahamu API to ask and get a response. + This is a custom method, which is the current default for fetching items, + as it overrides the adapter provided above. + This function was created to debug the slow response rate of Fahamu and + provide custom a response. + """ max_retries = 50 retry_delay = 3 response_msg = { -- cgit v1.2.3 From 38d867bdead1ebd1af0846d54474d9c962cd7ceb Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 16:43:12 +0300 Subject: Modify default backoff_factor for adapter. --- gn3/llms/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 4858ceb..401355a 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -48,7 +48,7 @@ class GeneNetworkQAClient(Session): """ def __init__(self, api_key, timeout=30, - total_retries=5, backoff_factor=30): + total_retries=5, backoff_factor=2): super().__init__() self.headers.update( {"Authorization": "Bearer " + api_key}) -- cgit v1.2.3 From 41c352926ca178e65d6c948fdf93b0f987e2878a Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 17:02:03 +0300 Subject: Remove redundant function for fetching data with task_id. --- gn3/llms/client.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 401355a..9bcb2e3 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -69,16 +69,6 @@ class GeneNetworkQAClient(Session): self.mount("https://", adapter) self.mount("http://", adapter) - def get_answer_using_task_id(self, extend_url, my_auth): - """call this method with task id to fetch response""" - try: - response = requests.get( - self.answer_url + extend_url, data={}, headers=my_auth) - response.raise_for_status() - return response - except requests.exceptions.RequestException as error: - raise error - @staticmethod def negative_status_msg(response): """ handler for non 200 response from fahamu api""" -- cgit v1.2.3 From c16c54759cfd493250424ee3f565862e5d6009b3 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 3 Sep 2024 10:41:33 +0300 Subject: Raise KeyError for doc_id not found in doc_ids. --- gn3/llms/process.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index bfce9a5..b8e47e7 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -41,13 +41,10 @@ class DocIDs(): Args: doc_id: str: a search key for doc_ids Returns: - an object with doc_info if doc_id in doc_ids + an object if doc id exists else + raises a KeyError """ - if doc_id in self.doc_ids.keys(): - return self.doc_ids[doc_id] - else: - return doc_id - + return self.doc_ids[doc_id] def format_bibliography_info(bib_info): """Utility function for formatting bibliography info @@ -74,9 +71,11 @@ def parse_context(context, get_info_func, format_bib_func): combo_txt = "" for entry in summary: combo_txt += "\t" + entry["text"] - doc_info = get_info_func(doc_ids) - bib_info = doc_ids if doc_ids == doc_info else format_bib_func( - doc_info) + try: + doc_info = get_info_func(doc_ids) + bib_info = format_bib_func(doc_info) + except KeyError: + bib_info = doc_ids pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*' combo_text = re.sub(pattern, lambda x: f" {x[0]} ", -- cgit v1.2.3 From c34d75a29b7d144030542b9de4fd0e0d614303a9 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 3 Sep 2024 11:08:31 +0300 Subject: Use Jsonb for storing results. --- gn3/api/llm.py | 2 +- sql/update/llm_db_tables.sql | 2 +- sql/update/llm_db_update.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 8d17bc2..9ee4a79 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -40,7 +40,7 @@ def search(): history(user_id TEXT NOT NULL, task_id TEXT NOT NULL, query TEXT NOT NULL, - results TEXT, + results JSONB, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(task_id)) WITHOUT ROWID""") cursor.execute( diff --git a/sql/update/llm_db_tables.sql b/sql/update/llm_db_tables.sql index a6c0479..b501832 100644 --- a/sql/update/llm_db_tables.sql +++ b/sql/update/llm_db_tables.sql @@ -26,7 +26,7 @@ CREATE TABLE IF NOT EXISTS history ( user_id TEXT NOT NULL, task_id TEXT NOT NULL, query TEXT NOT NULL, - results TEXT, + results JSONB, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (task_id) ) WITHOUT ROWID; diff --git a/sql/update/llm_db_update.sql b/sql/update/llm_db_update.sql index a4eb848..7f1a9f9 100644 --- a/sql/update/llm_db_update.sql +++ b/sql/update/llm_db_update.sql @@ -25,7 +25,7 @@ CREATE TABLE IF NOT EXISTS history ( user_id TEXT NOT NULL, task_id TEXT NOT NULL, query TEXT NOT NULL, - results TEXT, + results JSONB, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (task_id) ) WITHOUT ROWID; -- cgit v1.2.3 From dbe79d8a4ca31e23d18b9fdb352dc783c7e0db64 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 3 Sep 2024 11:46:21 +0300 Subject: Remove unused imports. --- gn3/api/llm.py | 1 - gn3/llms/client.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 9ee4a79..20831e5 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,6 +1,5 @@ """Api endpoints for gnqa""" import json -from datetime import datetime, timezone from flask import Blueprint from flask import current_app from flask import jsonify diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 9bcb2e3..54a7a17 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -2,8 +2,6 @@ # pylint: disable=C0301 import json import time - -import requests from requests import Session from requests.adapters import HTTPAdapter from requests.adapters import Retry -- cgit v1.2.3 From e80b08c42550aaf6ae014d88d5eaad6c9659f212 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 3 Sep 2024 11:46:39 +0300 Subject: Add unittests for formatting bib info. --- tests/unit/test_llm.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index c32e888..97365f4 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -1,7 +1,7 @@ """Test cases for procedures defined in llms """ import pytest from gn3.llms.process import parse_context - +from gn3.llms.process import format_bibliography_info @pytest.mark.unit_test @@ -32,3 +32,25 @@ def test_parse_context(): ] assert parsed_result == expected_result + +@pytest.mark.unit_test +def test_format_bib_info(): + mock_fahamu_bib_info = [ + { + "author": "J.m", + "firstName": "john", + "title": "Genes and aging", + "year": 2013, + "doi": "https://Articles.com/12231" + }, + "2019-Roy-Evaluation of Sirtuin-3 probe quality and co-expressed genes", + "2015 - Differential regional and cellular distribution of TFF3 peptide in the human brain.txt"] + expected_result = [ + "J.m.Genes and aging.2013.https://Articles.com/12231 ", + "2019-Roy-Evaluation of Sirtuin-3 probe quality and co-expressed genes", + "2015 - Differential regional and cellular distribution of TFF3 peptide in the human brain" + ] + + assert all([format_bibliography_info(data) == expected + for data, expected + in zip(mock_fahamu_bib_info, expected_result)]) -- cgit v1.2.3 From 3c3f91e6cfd0f4557101dfb0729d0b4f6bad3604 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 3 Sep 2024 12:15:54 +0300 Subject: Add tests for fetching and populating references. --- tests/unit/test_llm.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 97365f4..51b4921 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -1,5 +1,6 @@ """Test cases for procedures defined in llms """ import pytest +from gn3.llms.process import fetch_pubmed from gn3.llms.process import parse_context from gn3.llms.process import format_bibliography_info @@ -54,3 +55,50 @@ def test_format_bib_info(): assert all([format_bibliography_info(data) == expected for data, expected in zip(mock_fahamu_bib_info, expected_result)]) + + +@pytest.mark.unit_test +def test_fetching_pubmed_info(monkeypatch): + + def mock_load_file(filename, dir_path): + return { + "12121" : { + "Abstract": "items1", + "Author": "A1" + } + } + # patch the module with the mocked function + + monkeypatch.setattr("gn3.llms.process.load_file", mock_load_file) + expected_results = [ + { + "title": "Genes", + "year": "2014", + "doi": "https/article/genes/12121", + "doc_id": "12121", + "pubmed": { + "Abstract": "items1", + "Author": "A1" + } + }, + { + "title": "Aging", + "year" : "2014", + "doc_id": "12122" + } + ] + + data = [ { + "title": "Genes", + "year": "2014", + "doi": "https/article/genes/12121", + "doc_id": "12121", + }, + { + "title": "Aging", + "year" : "2014", + "doc_id": "12122" + }] + + assert (fetch_pubmed(data, "/pubmed.json", "data/") + == expected_results) -- cgit v1.2.3 From a190f6aa0ddd745e46f6ca059d06a3fca5ffbf2e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 3 Sep 2024 12:31:04 +0300 Subject: Make pylint fixes. --- tests/unit/test_llm.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 51b4921..8fbaba6 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -1,4 +1,5 @@ """Test cases for procedures defined in llms """ +# pylint: disable=C0301 import pytest from gn3.llms.process import fetch_pubmed from gn3.llms.process import parse_context @@ -36,6 +37,7 @@ def test_parse_context(): @pytest.mark.unit_test def test_format_bib_info(): + """Test for formatting bibliography info """ mock_fahamu_bib_info = [ { "author": "J.m", @@ -52,17 +54,17 @@ def test_format_bib_info(): "2015 - Differential regional and cellular distribution of TFF3 peptide in the human brain" ] - assert all([format_bibliography_info(data) == expected + assert all((format_bibliography_info(data) == expected for data, expected - in zip(mock_fahamu_bib_info, expected_result)]) + in zip(mock_fahamu_bib_info, expected_result))) @pytest.mark.unit_test def test_fetching_pubmed_info(monkeypatch): - - def mock_load_file(filename, dir_path): + """Test for fetching and populating pubmed data with pubmed info""" + def mock_load_file(_filename, _dir_path): return { - "12121" : { + "12121": { "Abstract": "items1", "Author": "A1" } @@ -83,22 +85,22 @@ def test_fetching_pubmed_info(monkeypatch): }, { "title": "Aging", - "year" : "2014", + "year": "2014", "doc_id": "12122" } ] - data = [ { + data = [{ "title": "Genes", "year": "2014", "doi": "https/article/genes/12121", "doc_id": "12121", - }, - { + }, + { "title": "Aging", - "year" : "2014", + "year": "2014", "doc_id": "12122" - }] + }] assert (fetch_pubmed(data, "/pubmed.json", "data/") == expected_results) -- cgit v1.2.3 From 4dc60a7de36134b416d3414bea0c89f49f9420f4 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 5 Sep 2024 13:58:27 +0300 Subject: Create new endpoints for fetching user previous records. --- gn3/api/llm.py | 72 +++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 20831e5..192af23 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -84,32 +84,62 @@ def rate_queries(task_id): }, 200 -@gnqa.route("/history", methods=["GET", "DELETE"]) +@gnqa.route("/search/records", methods=["GET"]) @require_oauth("profile user") -def fetch_prev_history(): - """Api endpoint to fetch GNQA previous search.""" +def get_user_search_records(): + """get all history records for a given user using their + user id + """ with (require_oauth.acquire("profile user") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() - if request.method == "DELETE": - task_ids = list(request.json.values()) - query = """DELETE FROM history - WHERE task_id IN ({}) - and user_id=?""".format(",".join("?" * len(task_ids))) - cursor.execute(query, (*task_ids, str(token.user.user_id),)) - return jsonify({}) - elif (request.method == "GET" and - request.args.get("search_term")): - cursor.execute( - """SELECT results from history - Where task_id=? and user_id=?""", - (request.args.get("search_term"), - str(token.user.user_id),)) - record = cursor.fetchone() - if record: - return dict(record).get("results") - return {} cursor.execute( """SELECT task_id,query from history WHERE user_id=?""", (str(token.user.user_id),)) return jsonify([dict(item) for item in cursor.fetchall()]) + + +@gnqa.route("/search/record/", methods=["GET"]) +@require_oauth("profile user") +def get_user_record_by_task(task_id): + """Get user record by task id """ + with (require_oauth.acquire("profile user") as token, + db.connection(current_app.config["LLM_DB_PATH"]) as conn): + cursor = conn.cursor() + cursor.execute( + """SELECT results from history + Where task_id=? and user_id=?""", + (task_id, + str(token.user.user_id),)) + record = cursor.fetchone() + if record: + return dict(record).get("results") + return {} + + +@gnqa.route("/search/record/", methods=["DELETE"]) +@require_oauth("profile user") +def delete_record(task_id): + """Delete user record by task-id""" + with (require_oauth.acquire("profile user") as token, + db.connection(current_app.config["LLM_DB_PATH"]) as conn): + cursor = conn.cursor() + query = """DELETE FROM history + WHERE task_id=? and user_id=?""" + cursor.execute(query, (task_id, token.user.user_id,)) + return {"msg": f"Successfully Deleted the task {task_id}"} + + +@gnqa.route("/search/records", methods=["DELETE"]) +@require_oauth("profile user") +def delete_records(): + """ Delete a users records using for all given task ids""" + with (require_oauth.acquire("profile user") as token, + db.connection(current_app.config["LLM_DB_PATH"]) as conn): + task_ids = list(request.json.values()) + cursor = conn.cursor() + query = """DELETE FROM history + WHERE task_id IN ({}) + and user_id=?""".format(",".join("?" * len(task_ids))) + cursor.execute(query, (*task_ids, str(token.user.user_id),)) + return jsonify({}) -- cgit v1.2.3 From 57986c976c702c590cc814fa9863c4fd9be42c6f Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 5 Sep 2024 14:00:07 +0300 Subject: Apply pep8 formatting. --- gn3/api/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 192af23..cdf427e 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -80,7 +80,7 @@ def rate_queries(task_id): weight=excluded.weight """, (str(user_id), query, answer, weight, task_id)) return { - "message": "You have successfully rated this query. Thank you!" + "message": "You have successfully rated this query. Thank you!" }, 200 -- cgit v1.2.3 From 8cb85c8f8c12180702cfc3a257bf9a513ac4da3d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 5 Sep 2024 15:27:00 +0300 Subject: Sort previos records by datetime. --- gn3/api/llm.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gn3/api/llm.py b/gn3/api/llm.py index cdf427e..7e60271 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -1,5 +1,7 @@ """Api endpoints for gnqa""" import json +from datetime import datetime + from flask import Blueprint from flask import current_app from flask import jsonify @@ -94,15 +96,18 @@ def get_user_search_records(): db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() cursor.execute( - """SELECT task_id,query from history WHERE user_id=?""", + """SELECT task_id, query, created_at from history WHERE user_id=?""", (str(token.user.user_id),)) - return jsonify([dict(item) for item in cursor.fetchall()]) + results = [dict(item) for item in cursor.fetchall()] + return jsonify(sorted(results, reverse=True, + key=lambda x: datetime.strptime(x.get("created_at"), + '%Y-%m-%d %H:%M:%S'))) @gnqa.route("/search/record/", methods=["GET"]) @require_oauth("profile user") def get_user_record_by_task(task_id): - """Get user record by task id """ + """Get user previous search record by task id """ with (require_oauth.acquire("profile user") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() @@ -120,7 +125,7 @@ def get_user_record_by_task(task_id): @gnqa.route("/search/record/", methods=["DELETE"]) @require_oauth("profile user") def delete_record(task_id): - """Delete user record by task-id""" + """Delete user previous seach record by task-id""" with (require_oauth.acquire("profile user") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() -- cgit v1.2.3