From 69013d298c869a42059af13bc63bef1bbdc7393d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:05:30 +0300 Subject: Update file to use correct import from response file --- gn3/llms/process.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index e38b73e..4edc238 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -49,8 +49,9 @@ def parse_context(context, get_info_func, format_bib_func): def rate_document(task_id, doc_id, rating, auth_token): - """This method is used to provide feedback for a document by making a rating.""" - # todo move this to clients + """This method is used to provide + feedback for a document by making a rating + """ try: url = urljoin(BASE_URL, f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""") @@ -107,7 +108,7 @@ def get_gnqa(query, auth_token, tmp_dir=""): answer = resp_text['data']['answer'] context = resp_text['data']['context'] references = parse_context( - context, DocIDs().getInfo, format_bibliography_info) + context, DocIDs().get_info, format_bibliography_info) references = fetch_pubmed(references, "pubmed.json", tmp_dir) return task_id, answer, references -- cgit v1.2.3 From 75365bd88a720261a1b454f0ea11a840fb3be83e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:20:00 +0300 Subject: Move Parsing Doc_Ids to process file * Context: groups related items --- gn3/llms/process.py | 51 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 8 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 4edc238..1881e92 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -1,21 +1,56 @@ """this module contains code for processing response from fahamu client.py""" +# pylint: disable=C0301 import os import string import json +import logging +import requests from urllib.parse import urljoin from urllib.parse import quote -import logging -import requests from gn3.llms.client import GeneNetworkQAClient -from gn3.llms.response import DocIDs BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks' - - -# pylint: disable=C0301 +BASEDIR = os.path.abspath(os.path.dirname(__file__)) + + +class DocIDs(): + """ Class Method to Parse document id and names from files""" + def __init__(self): + """ + init method for Docids + * doc_ids.json: opens doc)ids for gn references + * sugar_doc_ids: open doci_ids for diabetes references + """ + self.doc_ids = self.load_file("doc_ids.json") + self.sugar_doc_ids = self.load_file("all_files.json") + self.format_doc_ids(self.sugar_doc_ids) + + def load_file(self, file_name): + """Method to load and read doc_id files""" + file_path = os.path.join(BASEDIR, file_name) + if os.path.isfile(file_path): + with open(file_path, "rb") as file_handler: + return json.load(file_handler) + else: + raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") + + def format_doc_ids(self, docs): + """method to format doc_ids for list items""" + for _key, val in docs.items(): + if isinstance(val, list): + for doc_obj in val: + doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") + self.doc_ids.update({doc_obj["id"]: doc_name}) + + def get_info(self, doc_id): + """ interface to make read from doc_ids""" + if doc_id in self.doc_ids.keys(): + return self.doc_ids[doc_id] + else: + return doc_id def format_bibliography_info(bib_info): @@ -131,6 +166,6 @@ def fetch_query_results(query, user_id, redis_conn): def get_user_queries(user_id, redis_conn): """methos to fetch all queries for a specific user""" - results = redis_conn.keys(f"LLM:{user_id}*") - return [query for query in [result.partition("-")[2] for result in results] if query != ""] + return [query for query in + [result.partition("-")[2] for result in results] if query != ""] -- cgit v1.2.3 From a5a6e319e85c28ff3ab9d6f2d8a869bc2ac77ac8 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:25:44 +0300 Subject: Delete function: only useful when training own llm model. --- gn3/llms/process.py | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 1881e92..e47a997 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -83,26 +83,6 @@ def parse_context(context, get_info_func, format_bib_func): return results -def rate_document(task_id, doc_id, rating, auth_token): - """This method is used to provide - feedback for a document by making a rating - """ - try: - url = urljoin(BASE_URL, - f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""") - headers = {"Authorization": f"Bearer {auth_token}"} - - resp = requests.post(url, headers=headers) - resp.raise_for_status() - - return {"status": "success", **resp.json()} - except requests.exceptions.HTTPError as http_error: - raise RuntimeError(f"HTTP Error Occurred:\ - {http_error.response.text} -with status code- {http_error.response.status_code}") from http_error - except Exception as error: - raise RuntimeError(f"An error occurred: {str(error)}") from error - - def load_file(filename, dir_path): """function to open and load json file""" file_path = os.path.join(dir_path, f"{filename}") -- cgit v1.2.3 From 3913374700521647e93bf9afabb9943746ac5d5b Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:27:20 +0300 Subject: Pep8 formatting gn3:llm:process. --- gn3/llms/process.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index e47a997..d080acb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -6,9 +6,7 @@ import json import logging import requests -from urllib.parse import urljoin from urllib.parse import quote - from gn3.llms.client import GeneNetworkQAClient -- cgit v1.2.3 From f30300a82f605fa96130fbcbdcd17c53296d2372 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 16:34:34 +0300 Subject: Minor code refactoring related --- gn3/llms/process.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index d080acb..11961eb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -4,9 +4,9 @@ import os import string import json import logging +from urllib.parse import quote import requests -from urllib.parse import quote from gn3.llms.client import GeneNetworkQAClient @@ -106,15 +106,24 @@ def fetch_pubmed(references, file_name, data_dir=""): return references -def get_gnqa(query, auth_token, tmp_dir=""): - """entry function for the gn3 api endpoint()""" +def get_gnqa(query, auth_token, data_dir=""): + """entry function for the gn3 api endpoint() + ARGS: + query: what is a gene + auth_token: token to connect to api_client + data_dir: base datirectory for gn3 data + Returns: + task_id: fahamu unique identifier for task + answer + references: contains doc_name,reference,pub_med_info + """ - api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) + api_client = GeneNetworkQAClient(requests.Session(), auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") - res, success = api_client.get_answer(task_id) - if success == 1: + res, status = api_client.get_answer(task_id) + if status == 1: resp_text = filter_response_text(res.text) if resp_text.get("data") is None: return task_id, "Please try to rephrase your question to receive feedback", [] @@ -122,7 +131,7 @@ def get_gnqa(query, auth_token, tmp_dir=""): context = resp_text['data']['context'] references = parse_context( context, DocIDs().get_info, format_bibliography_info) - references = fetch_pubmed(references, "pubmed.json", tmp_dir) + references = fetch_pubmed(references, "pubmed.json", data_dir) return task_id, answer, references else: -- cgit v1.2.3 From 50c8500105912a6380ea8f971ccfb17ef0994279 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 18:56:41 +0300 Subject: Refactor code for http request adapters. --- gn3/llms/client.py | 43 +++++++++++-------------------------------- gn3/llms/process.py | 2 +- 2 files changed, 12 insertions(+), 33 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 2e9898f..810227f 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,5 +1,6 @@ """Module Contains code for making request to fahamu Api""" # pylint: disable=C0301 +# pylint: disable=R0913 import json import time @@ -24,13 +25,13 @@ class TimeoutHTTPAdapter(HTTPAdapter): self.timeout = timeout super().__init__(*args, **kwargs) - def send(self, request, **kwargs): + def send(self, *args, **kwargs): """Override :obj:`HTTPAdapter` send method to add a default timeout.""" timeout = kwargs.get("timeout") if timeout is None: kwargs["timeout"] = self.timeout - return super().send(request, **kwargs) + return super().send(*args, **kwargs) class GeneNetworkQAClient(Session): @@ -77,21 +78,15 @@ class GeneNetworkQAClient(Session): self.mount("https://", adapter) self.mount("http://", adapter) - def ask_the_documents(self, extend_url, my_auth): + def get_answer_using_task_id(self, extend_url, my_auth): + """call this method with task id to fetch response""" try: - response = requests.post( - self.base_url + extend_url, data={}, headers=my_auth) + response = requests.get( + self.answer_url + extend_url, data={}, headers=my_auth) response.raise_for_status() + return response except requests.exceptions.RequestException as error: - raise RuntimeError(f"Error making the request: {error}") from error - if response.status_code != 200: - return GeneNetworkQAClient.negative_status_msg(response), 0 - task_id = GeneNetworkQAClient.get_task_id_from_result(response) - response = GeneNetworkQAClient.get_answer_using_task_id(task_id, - my_auth) - if response.status_code != 200: - return GeneNetworkQAClient.negative_status_msg(response), 0 - return response, 1 + raise error @staticmethod def negative_status_msg(response): @@ -102,7 +97,7 @@ class GeneNetworkQAClient(Session): """fahamu ask api interface""" res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs) if res.status_code != 200: - return self.negative_status_msg(res), 0 + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 return res, json.loads(res.text) def get_answer(self, taskid, *args, **kwargs): @@ -110,25 +105,9 @@ class GeneNetworkQAClient(Session): query = f"{self.answer_url}?task_id={taskid['task_id']}" res = self.custom_request('GET', query, *args, **kwargs) if res.status_code != 200: - return self.negative_status_msg(res), 0 + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 return res, 1 - @staticmethod - def get_task_id_from_result(response): - """method to get task_id from response""" - task_id = json.loads(response.text) - return f"?task_id={task_id.get('task_id', '')}" - - def get_answer_using_task_id(self, extend_url, my_auth): - """call this method with task id to fetch response""" - try: - response = requests.get( - self.answer_url + extend_url, data={}, headers=my_auth) - response.raise_for_status() - return response - except requests.exceptions.RequestException as error: - raise error - def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" max_retries = 50 diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 11961eb..9cb09a1 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -118,7 +118,7 @@ def get_gnqa(query, auth_token, data_dir=""): references: contains doc_name,reference,pub_med_info """ - api_client = GeneNetworkQAClient(requests.Session(), auth_token) + api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") -- cgit v1.2.3 From d3bc323fe3a965ee5b6917987c4fe7662056e560 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 17 May 2024 13:34:37 +0300 Subject: Refactor custom request codebase. --- gn3/llms/client.py | 40 +++++++++++++++++++--------------------- gn3/llms/process.py | 5 ++--- 2 files changed, 21 insertions(+), 24 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 05e3500..d57bca2 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,6 +1,5 @@ """Module Contains code for making request to fahamu Api""" # pylint: disable=C0301 -# pylint: disable=R0913 import json import time @@ -36,9 +35,7 @@ class GeneNetworkQAClient(Session): request timeouts, and request retries. Args: - account (str): Base address subdomain. api_key (str): API key. - version (str, optional): API version, defaults to "v3". timeout (int, optional): Timeout value, defaults to 5. total_retries (int, optional): Total retries value, defaults to 5. backoff_factor (int, optional): Retry backoff factor value, @@ -50,7 +47,7 @@ class GeneNetworkQAClient(Session): api_key="XXXXXXXXXXXXXXXXXXX...") """ - def __init__(self, account, api_key, version="v3", timeout=30, + def __init__(self, api_key, timeout=30, total_retries=5, backoff_factor=30): super().__init__() self.headers.update( @@ -95,11 +92,14 @@ class GeneNetworkQAClient(Session): def get_answer(self, taskid, *args, **kwargs): """Fahamu get answer interface""" - query = f"{self.answer_url}?task_id={taskid['task_id']}" - res = self.custom_request('GET', query, *args, **kwargs) - if res.status_code != 200: - return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 - return res, 1 + try: + query = f"{self.answer_url}?task_id={taskid['task_id']}" + res = self.custom_request('GET', query, *args, **kwargs) + if res.status_code != 200: + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 + return res, 1 + except TimeoutError: + return "Timeout error occured:try to rephrase your query", 0 def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" @@ -109,20 +109,18 @@ class GeneNetworkQAClient(Session): try: response = super().request(method, url, *args, **kwargs) response.raise_for_status() + if response.ok: + if method.lower() == "get" and response.json().get("data") is None: + time.sleep(retry_delay) + continue + return response + else: + time.sleep(retry_delay) except requests.exceptions.HTTPError as error: if error.response.status_code == 500: raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error - elif error.response.status_code == 404: - raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.base_url}") from error - raise error + raise LLMError(error.request, error.response, + f"HTTP error occurred with error status:{error.response.status_code}") from error except requests.exceptions.RequestException as error: raise error - if response.ok: - if method.lower() == "get" and response.json().get("data") is None: - time.sleep(retry_delay) - continue - else: - return response - else: - time.sleep(retry_delay) - return response + raise TimeoutError diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 9cb09a1..4725bcb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -5,7 +5,6 @@ import string import json import logging from urllib.parse import quote -import requests from gn3.llms.client import GeneNetworkQAClient @@ -118,7 +117,7 @@ def get_gnqa(query, auth_token, data_dir=""): references: contains doc_name,reference,pub_med_info """ - api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) + api_client = GeneNetworkQAClient(api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") @@ -135,7 +134,7 @@ def get_gnqa(query, auth_token, data_dir=""): return task_id, answer, references else: - return task_id, "Please try to rephrase your question to receive feedback", [] + return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", [] def fetch_query_results(query, user_id, redis_conn): -- cgit v1.2.3 From 2a99da9f46233a28e9ea0b6a297d8a6b93f61923 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 21 May 2024 16:38:53 +0300 Subject: Remove obsolete functions. --- gn3/api/llm.py | 39 +-------------------------------------- gn3/llms/process.py | 20 -------------------- 2 files changed, 1 insertion(+), 58 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/api/llm.py b/gn3/api/llm.py index b2c9c3e..02b37f9 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -9,8 +9,6 @@ from flask import jsonify from flask import request from gn3.llms.process import get_gnqa -from gn3.llms.process import get_user_queries -from gn3.llms.process import fetch_query_results from gn3.llms.errors import LLMError from gn3.auth.authorisation.oauth2.resource_server import require_oauth @@ -46,7 +44,7 @@ def gnqna(): redis_conn.set( f"LLM:{str(token.user.user_id)}-{str(task_id['task_id'])}", json.dumps(response) - ) + ) return response except Exception: # handle specific error return response @@ -105,38 +103,3 @@ def fetch_prev_searches(): for key in redis_conn.scan_iter(f"LLM:{str(the_token.user.user_id)}*"): query_result[key] = json.loads(redis_conn.get(key)) return jsonify(query_result) - - -@gnqa.route("/history/", methods=["GET"]) -@require_oauth("profile user") -def fetch_user_hist(query): - """"Endpoint to fetch previos searches for User""" - with (require_oauth.acquire("profile user") as the_token, - Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn): - return jsonify({ - **fetch_query_results(query, the_token.user.user_id, redis_conn), - "prev_queries": get_user_queries("random_user", redis_conn) - }) - - -@gnqa.route("/historys/", methods=["GET"]) -def fetch_users_hist_records(query): - """method to fetch all users hist:note this is a test functionality - to be replaced by fetch_user_hist - """ - with Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True) as redis_conn: - return jsonify({ - **fetch_query_results(query, "random_user", redis_conn), - "prev_queries": get_user_queries("random_user", redis_conn) - }) - - -@gnqa.route("/get_hist_names", methods=["GET"]) -def fetch_prev_hist_ids(): - """Test method for fetching history for Anony Users""" - with (Redis.from_url(current_app.config["REDIS_URI"], - decode_responses=True)) as redis_conn: - return jsonify({"prev_queries": get_user_queries("random_user", - redis_conn)}) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 4725bcb..eba7e4b 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -135,23 +135,3 @@ def get_gnqa(query, auth_token, data_dir=""): return task_id, answer, references else: return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", [] - - -def fetch_query_results(query, user_id, redis_conn): - """this method fetches prev user query searches""" - result = redis_conn.get(f"LLM:{user_id}-{query}") - if result: - return json.loads(result) - return { - "query": query, - "answer": "Sorry No answer for you", - "references": [], - "task_id": None - } - - -def get_user_queries(user_id, redis_conn): - """methos to fetch all queries for a specific user""" - results = redis_conn.keys(f"LLM:{user_id}*") - return [query for query in - [result.partition("-")[2] for result in results] if query != ""] -- cgit v1.2.3 From 13bb57cbd191ffe6e40e830ca08b9191b2dc5700 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 15:34:53 +0300 Subject: Pass query as an argument to api_client ask method. --- gn3/llms/process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index eba7e4b..d53a7fd 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -118,7 +118,7 @@ def get_gnqa(query, auth_token, data_dir=""): """ api_client = GeneNetworkQAClient(api_key=auth_token) - res, task_id = api_client.ask('?ask=' + quote(query), auth_token) + res, task_id = api_client.ask('?ask=' + quote(query), query=query) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") res, status = api_client.get_answer(task_id) -- cgit v1.2.3 From 651f307a4b8e60aaea0c8a7649a5b02aafce7a98 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 24 May 2024 15:59:30 +0300 Subject: Removed status check on get_gnqa function. --- gn3/llms/process.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index d53a7fd..ab2a80e 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -116,22 +116,11 @@ def get_gnqa(query, auth_token, data_dir=""): answer references: contains doc_name,reference,pub_med_info """ - api_client = GeneNetworkQAClient(api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), query=query) - if task_id == 0: - raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") - res, status = api_client.get_answer(task_id) - if status == 1: - resp_text = filter_response_text(res.text) - if resp_text.get("data") is None: - return task_id, "Please try to rephrase your question to receive feedback", [] - answer = resp_text['data']['answer'] - context = resp_text['data']['context'] - references = parse_context( - context, DocIDs().get_info, format_bibliography_info) - references = fetch_pubmed(references, "pubmed.json", data_dir) - - return task_id, answer, references - else: - return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", [] + res, _status = api_client.get_answer(task_id) + resp_text = filter_response_text(res.text) + answer = resp_text['data']['answer'] + context = resp_text['data']['context'] + return task_id, answer, fetch_pubmed(parse_context( + context, DocIDs().get_info, format_bibliography_info), "pubmed.json", data_dir) -- cgit v1.2.3 From 105f2b36eb62b9b097e1cbf6fa815f98da77bc16 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 14:09:02 +0300 Subject: Update Docstrings for Api endpoints and functions. --- gn3/api/llm.py | 6 +++--- gn3/llms/process.py | 48 +++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 10 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/api/llm.py b/gn3/api/llm.py index ab33c7a..4b8ec52 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -17,7 +17,7 @@ gnqa = Blueprint("gnqa", __name__) @gnqa.route("/search", methods=["POST"]) def search(): - """Main gnqa endpoint""" + """Api endpoint for searching queries in fahamu Api""" query = request.json.get("querygnqa", "") if not query: return jsonify({"error": "querygnqa is missing in the request"}), 400 @@ -56,7 +56,7 @@ def search(): @gnqa.route("/rating/", methods=["POST"]) @require_oauth("profile") def rate_queries(task_id): - """Endpoint for rating qnqa query and answer""" + """Api endpoint for rating GNQA query and answer""" with (require_oauth.acquire("profile") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): results = request.json @@ -89,7 +89,7 @@ def rate_queries(task_id): @gnqa.route("/history", methods=["GET"]) @require_oauth("profile user") def fetch_prev_history(): - """ api method to fetch search query records""" + """Api endpoint to fetch GNQA previous search.""" with (require_oauth.acquire("profile user") as token, db.connection(current_app.config["LLM_DB_PATH"]) as conn): cursor = conn.cursor() diff --git a/gn3/llms/process.py b/gn3/llms/process.py index ab2a80e..ade4104 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -35,7 +35,7 @@ class DocIDs(): raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") def format_doc_ids(self, docs): - """method to format doc_ids for list items""" + """method to format doc_ids for list items doc_id and doc_name""" for _key, val in docs.items(): if isinstance(val, list): for doc_obj in val: @@ -43,7 +43,14 @@ class DocIDs(): self.doc_ids.update({doc_obj["id"]: doc_name}) def get_info(self, doc_id): - """ interface to make read from doc_ids""" + """ interface to make read from doc_ids + and extract info data else returns + doc_id + Args: + doc_id: str: a search key for doc_ids + Returns: + an object with doc_info if doc_id in doc_ids + """ if doc_id in self.doc_ids.keys(): return self.doc_ids[doc_id] else: @@ -51,7 +58,8 @@ class DocIDs(): def format_bibliography_info(bib_info): - """Function for formatting bibliography info""" + """Utility function for formatting bibliography info + """ if isinstance(bib_info, str): return bib_info.removesuffix('.txt') elif isinstance(bib_info, dict): @@ -66,7 +74,15 @@ def filter_response_text(val): def parse_context(context, get_info_func, format_bib_func): - """function to parse doc_ids content""" + """Function to parse doc_ids content + Args: + context: raw references from fahamu api + get_info_func: function to get doc_ids info + format_bib_func: function to foramt bibliography info + Returns: + an list with each item having (doc_id,bib_info, + combined reference text) + """ results = [] for doc_ids, summary in context.items(): combo_txt = "" @@ -81,7 +97,12 @@ def parse_context(context, get_info_func, format_bib_func): def load_file(filename, dir_path): - """function to open and load json file""" + """Utility function to read json file + Args: + filename: file name to read + dir_path: base directory for the file + Returns: json data read to a dict + """ file_path = os.path.join(dir_path, f"{filename}") if not os.path.isfile(file_path): raise FileNotFoundError(f"{filename} was not found or is a directory") @@ -90,8 +111,19 @@ def load_file(filename, dir_path): def fetch_pubmed(references, file_name, data_dir=""): - """method to fetch and populate references with pubmed""" + """ + Fetches PubMed data from a JSON file and populates the\ + references dictionary. + + Args: + references (dict): Dictionary with document IDs as keys\ + and reference data as values. + filename (str): Name of the JSON file containing PubMed data. + data_dir (str): Base directory where the data files are located. + Returns: + dict: Updated references dictionary populated with the PubMed data. + """ try: pubmed = load_file(file_name, os.path.join(data_dir, "gn-meta/lit")) for reference in references: @@ -123,4 +155,6 @@ def get_gnqa(query, auth_token, data_dir=""): answer = resp_text['data']['answer'] context = resp_text['data']['context'] return task_id, answer, fetch_pubmed(parse_context( - context, DocIDs().get_info, format_bibliography_info), "pubmed.json", data_dir) + context, DocIDs().get_info, + format_bibliography_info), + "pubmed.json", data_dir) -- cgit v1.2.3 From d0801cea229d00d5d4ce19fa1cb36242e56070d1 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 14:18:48 +0300 Subject: Delete filter response text method and update relevant code. --- gn3/llms/process.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index ade4104..2ce6b2b 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -67,12 +67,6 @@ def format_bibliography_info(bib_info): return bib_info -def filter_response_text(val): - """helper function for filtering non-printable chars""" - return json.loads(''.join([str(char) - for char in val if char in string.printable])) - - def parse_context(context, get_info_func, format_bib_func): """Function to parse doc_ids content Args: @@ -151,7 +145,8 @@ def get_gnqa(query, auth_token, data_dir=""): api_client = GeneNetworkQAClient(api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), query=query) res, _status = api_client.get_answer(task_id) - resp_text = filter_response_text(res.text) + resp_text = json.loads(''.join([str(char) + for char in res.text if char in string.printable])) answer = resp_text['data']['answer'] context = resp_text['data']['context'] return task_id, answer, fetch_pubmed(parse_context( -- cgit v1.2.3 From 58fbc6527537cb229ded87eea57949c3cf02621f Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 14:39:38 +0300 Subject: Remove duplicate code for loading files. --- gn3/llms/process.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 2ce6b2b..40e53c5 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -21,19 +21,10 @@ class DocIDs(): * doc_ids.json: opens doc)ids for gn references * sugar_doc_ids: open doci_ids for diabetes references """ - self.doc_ids = self.load_file("doc_ids.json") - self.sugar_doc_ids = self.load_file("all_files.json") + self.doc_ids = load_file("doc_ids.json", BASEDIR) + self.sugar_doc_ids = load_file("all_files.json", BASEDIR) self.format_doc_ids(self.sugar_doc_ids) - def load_file(self, file_name): - """Method to load and read doc_id files""" - file_path = os.path.join(BASEDIR, file_name) - if os.path.isfile(file_path): - with open(file_path, "rb") as file_handler: - return json.load(file_handler) - else: - raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") - def format_doc_ids(self, docs): """method to format doc_ids for list items doc_id and doc_name""" for _key, val in docs.items(): -- cgit v1.2.3 From d3f87b9a02bfec223d23c16eb1374d53065fea92 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 27 May 2024 17:37:13 +0300 Subject: Add regular expressions for parsing links in texts. --- gn3/llms/process.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 40e53c5..55c27a0 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -1,6 +1,7 @@ """this module contains code for processing response from fahamu client.py""" # pylint: disable=C0301 import os +import re import string import json import logging @@ -76,8 +77,13 @@ def parse_context(context, get_info_func, format_bib_func): doc_info = get_info_func(doc_ids) bib_info = doc_ids if doc_ids == doc_info else format_bib_func( doc_info) + pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*' + combo_text = re.sub(pattern, + lambda x: f" {x[0]} ", + combo_txt) results.append( - {"doc_id": doc_ids, "bibInfo": bib_info, "comboTxt": combo_txt}) + {"doc_id": doc_ids, "bibInfo": bib_info, + "comboTxt": combo_text}) return results @@ -137,8 +143,10 @@ def get_gnqa(query, auth_token, data_dir=""): res, task_id = api_client.ask('?ask=' + quote(query), query=query) res, _status = api_client.get_answer(task_id) resp_text = json.loads(''.join([str(char) - for char in res.text if char in string.printable])) - answer = resp_text['data']['answer'] + for char in res.text if char in string.printable])) + answer = re.sub(r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*', + lambda x: f" {x[0]} ", + resp_text["data"]["answer"]) context = resp_text['data']['context'] return task_id, answer, fetch_pubmed(parse_context( context, DocIDs().get_info, -- cgit v1.2.3 From 2e81e48695e9b5618746c8cd1c6c83b452836442 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:12:12 +0300 Subject: Fix minor syntax issue. --- gn3/llms/process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 55c27a0..c3e6eda 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -19,8 +19,8 @@ class DocIDs(): def __init__(self): """ init method for Docids - * doc_ids.json: opens doc)ids for gn references - * sugar_doc_ids: open doci_ids for diabetes references + * doc_ids.json: open doc_ids for gn references + * sugar_doc_ids: open doc_ids for diabetes references """ self.doc_ids = load_file("doc_ids.json", BASEDIR) self.sugar_doc_ids = load_file("all_files.json", BASEDIR) -- cgit v1.2.3 From 086c80510ff418bca77f544d3dd4b174d2dc9c8e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:15:01 +0300 Subject: Remove unecessary check for open file. --- gn3/llms/process.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index c3e6eda..ef925c4 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -94,10 +94,8 @@ def load_file(filename, dir_path): dir_path: base directory for the file Returns: json data read to a dict """ - file_path = os.path.join(dir_path, f"{filename}") - if not os.path.isfile(file_path): - raise FileNotFoundError(f"{filename} was not found or is a directory") - with open(file_path, "rb") as file_handler: + with open(os.path.join(dir_path, f"{filename}"), + "rb") as file_handler: return json.load(file_handler) -- cgit v1.2.3 From 742beb6ee663bc9ae5409461d2be4b2144b8893e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 2 Sep 2024 15:17:12 +0300 Subject: Refactor doc_id object. --- gn3/llms/process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index ef925c4..bfce9a5 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -23,8 +23,8 @@ class DocIDs(): * sugar_doc_ids: open doc_ids for diabetes references """ self.doc_ids = load_file("doc_ids.json", BASEDIR) - self.sugar_doc_ids = load_file("all_files.json", BASEDIR) - self.format_doc_ids(self.sugar_doc_ids) + sugar_doc_ids = load_file("all_files.json", BASEDIR) + self.format_doc_ids(sugar_doc_ids) def format_doc_ids(self, docs): """method to format doc_ids for list items doc_id and doc_name""" -- cgit v1.2.3 From c16c54759cfd493250424ee3f565862e5d6009b3 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 3 Sep 2024 10:41:33 +0300 Subject: Raise KeyError for doc_id not found in doc_ids. --- gn3/llms/process.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py index bfce9a5..b8e47e7 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -41,13 +41,10 @@ class DocIDs(): Args: doc_id: str: a search key for doc_ids Returns: - an object with doc_info if doc_id in doc_ids + an object if doc id exists else + raises a KeyError """ - if doc_id in self.doc_ids.keys(): - return self.doc_ids[doc_id] - else: - return doc_id - + return self.doc_ids[doc_id] def format_bibliography_info(bib_info): """Utility function for formatting bibliography info @@ -74,9 +71,11 @@ def parse_context(context, get_info_func, format_bib_func): combo_txt = "" for entry in summary: combo_txt += "\t" + entry["text"] - doc_info = get_info_func(doc_ids) - bib_info = doc_ids if doc_ids == doc_info else format_bib_func( - doc_info) + try: + doc_info = get_info_func(doc_ids) + bib_info = format_bib_func(doc_info) + except KeyError: + bib_info = doc_ids pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*' combo_text = re.sub(pattern, lambda x: f" {x[0]} ", -- cgit v1.2.3