From 7aa31cf63e17efe194e501bc37068a2207ab8f38 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 25 Apr 2024 19:45:00 +0300 Subject: Pep8 formatting for response file. --- gn3/llms/response.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/gn3/llms/response.py b/gn3/llms/response.py index 11cbd94..93320e9 100644 --- a/gn3/llms/response.py +++ b/gn3/llms/response.py @@ -1,11 +1,10 @@ # pylint: skip-file -import string import json import os -basedir = os.path.abspath(os.path.dirname(__file__)) +basedir = os.path.abspath(os.path.dirname(__file__)) class DocIDs(): @@ -26,21 +25,20 @@ class DocIDs(): return result else: raise Exception("\n{0} -- File does not exist\n".format(file_path)) - + def formatDocIDs(self, values): for _key, _val in values.items(): if isinstance(_val, list): for theObject in _val: docName = self.formatDocumentName(theObject['filename']) - docID = theObject['id'] + docID = theObject['id'] self.doc_ids.update({docID: docName}) - - def formatDocumentName(self, val): - result = val.removesuffix('.pdf') - result = result.removesuffix('.txt') - result = result.replace('_', ' ') - return result + def formatDocumentName(self, val): + result = val.removesuffix('.pdf') + result = result.removesuffix('.txt') + result = result.replace('_', ' ') + return result def getInfo(self, doc_id): if doc_id in self.doc_ids.keys(): @@ -48,6 +46,7 @@ class DocIDs(): else: return doc_id + class RespContext(): def __init__(self, context): self.cntxt = context @@ -66,10 +65,9 @@ class RespContext(): def createAccordionFromJson(theContext): result = '' # loop thru json array - ndx = 0 for docID, summaryLst in theContext.items(): # item is a key with a list comboTxt = '' for entry in summaryLst: comboTxt += '\t' + entry['text'] - return result \ No newline at end of file + return result -- cgit v1.2.3 From f6acfd3d6024ad36ef82a8e27918b03f6538cccc Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 25 Apr 2024 19:14:12 +0300 Subject: Code refactoring * this commit removes ununsed imports and also refactor GenenetworkQAclient Class --- gn3/llms/client.py | 67 +++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 042becd..b843907 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -2,19 +2,13 @@ import json import string import os -import datetime import time import requests from requests import Session -from urllib.parse import urljoin from requests.packages.urllib3.util.retry import Retry -from requests import HTTPError -from requests import Session from requests.adapters import HTTPAdapter -from urllib.request import urlretrieve from urllib.parse import quote -from gn3.llms.errors import UnprocessableEntity from gn3.llms.errors import LLMError basedir = os.path.join(os.path.dirname(__file__)) @@ -24,7 +18,8 @@ class TimeoutHTTPAdapter(HTTPAdapter): def __init__(self, timeout, *args, **kwargs): """TimeoutHTTPAdapter constructor. Args: - timeout (int): How many seconds to wait for the server to send data before + timeout (int): How many seconds to wait for the server to + send data before giving up. """ self.timeout = timeout @@ -43,7 +38,8 @@ class GeneNetworkQAClient(Session): """GeneNetworkQA Client This class provides a client object interface to the GeneNetworkQA API. - It extends the `requests.Session` class and includes authorization, base URL, + It extends the `requests.Session` class and includes authorization, + base URL, request timeouts, and request retries. Args: @@ -52,16 +48,19 @@ class GeneNetworkQAClient(Session): version (str, optional): API version, defaults to "v3". timeout (int, optional): Timeout value, defaults to 5. total_retries (int, optional): Total retries value, defaults to 5. - backoff_factor (int, optional): Retry backoff factor value, defaults to 30. + backoff_factor (int, optional): Retry backoff factor value, + defaults to 30. Usage: from genenetworkqa import GeneNetworkQAClient - gnqa = GeneNetworkQAClient(account="account-name", api_key="XXXXXXXXXXXXXXXXXXX...") + gnqa = GeneNetworkQAClient(account="account-name", + api_key="XXXXXXXXXXXXXXXXXXX...") """ BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks' - def __init__(self, account, api_key, version="v3", timeout=30, total_retries=5, backoff_factor=30): + def __init__(self, account, api_key, version="v3", timeout=30, + total_retries=5, backoff_factor=30): super().__init__() self.headers.update( {"Authorization": "Bearer " + api_key}) @@ -95,31 +94,31 @@ class GeneNetworkQAClient(Session): return bib_info @staticmethod - def ask_the_documents(extend_url, my_auth): + def ask_the_documents(self, extend_url, my_auth): try: response = requests.post( - base_url + extend_url, data={}, headers=my_auth) + self.base_url + extend_url, data={}, headers=my_auth) response.raise_for_status() except requests.exceptions.RequestException as e: # Handle the exception appropriately, e.g., log the error raise RuntimeError(f"Error making the request: {e}") if response.status_code != 200: - return negative_status_msg(response), 0 + return GeneNetworkQAClient.negative_status_msg(response), 0 - task_id = get_task_id_from_result(response) - response = get_answer_using_task_id(task_id, my_auth) + task_id = GeneNetworkQAClient.get_task_id_from_result(response) + response = GeneNetworkQAClient.get_answer_using_task_id(task_id, + my_auth) if response.status_code != 200: - return negative_status_msg(response), 0 + return GeneNetworkQAClient.negative_status_msg(response), 0 return response, 1 @staticmethod def negative_status_msg(response): return f"Error: Status code -{response.status_code}- Reason::{response.reason}" - # return f"Problems\n\tStatus code => {response.status_code}\n\tReason => {response.reason}" def ask(self, exUrl, *args, **kwargs): askUrl = self.BASE_URL + exUrl @@ -147,18 +146,13 @@ class GeneNetworkQAClient(Session): response.raise_for_status() except requests.exceptions.HTTPError as error: - if error.response.status_code ==500: - raise LLMError(error.request, error.response, f"Response Error,status_code:{error.response.status_code},Reason: Use of Invalid Token") - elif error.response.status_code ==404: - raise LLMError(error.request,error.response,f"404 Client Error: Not Found for url: {self.BASE_URL}") + if error.response.status_code == 500: + raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") + elif error.response.status_code == 404: + raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.BASE_URL}") raise error - except requests.exceptions.RequestException as error: - raise error - - - - + raise error if response.ok: if method.lower() == "get" and response.json().get("data") is None: time.sleep(retry_delay) @@ -175,11 +169,10 @@ class GeneNetworkQAClient(Session): result = f"?task_id={task_id.get('task_id', '')}" return result - @staticmethod - def get_answer_using_task_id(extend_url, my_auth): + def get_answer_using_task_id(self, extend_url, my_auth): try: response = requests.get( - answer_url + extend_url, data={}, headers=my_auth) + self.answer_url + extend_url, data={}, headers=my_auth) response.raise_for_status() return response except requests.exceptions.RequestException as error: @@ -189,7 +182,8 @@ class GeneNetworkQAClient(Session): @staticmethod def filter_response_text(val): """ - Filters out non-printable characters from the input string and parses it as JSON. + Filters out non-printable characters from + the input string and parses it as JSON. Args: val (str): Input string to be filtered and parsed. @@ -198,7 +192,8 @@ class GeneNetworkQAClient(Session): dict: Parsed JSON object. # remove this """ - return json.loads(''.join([str(char) for char in val if char in string.printable])) + return json.loads(''.join([str(char) for char in val if char + in string.printable])) def getTaskIDFromResult(self, res): return json.loads(res.text) @@ -208,11 +203,11 @@ class GeneNetworkQAClient(Session): def get_gnqa(self, query): qstr = quote(query) - res, task_id = api_client.ask('?ask=' + qstr) - res, success = api_client.get_answer(task_id) + res, task_id = GeneNetworkQAClient.ask('?ask=' + qstr) + res, success = GeneNetworkQAClient.get_answer(task_id) if success == 1: - resp_text = filter_response_text(res.text) + resp_text = GeneNetworkQAClient.filter_response_text(res.text) answer = resp_text.get('data', {}).get('answer', '') context = resp_text.get('data', {}).get('context', '') return answer, context -- cgit v1.2.3 From 852f5c65cdf78c92012afcf9790d272b4e3f4419 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 13:05:46 +0300 Subject: Pep8 Cleanup for llms/errors file --- gn3/llms/errors.py | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py index e9f7c02..af3d7b0 100644 --- a/gn3/llms/errors.py +++ b/gn3/llms/errors.py @@ -1,32 +1,11 @@ - -# pylint: skip-file +""" Error handlers for Fahamu Api""" import json - from requests import HTTPError class UnprocessableEntity(HTTPError): - """An HTTP 422 Unprocessable Entity error occurred. - + """Error for HTTP 422 Unprocessable Entity https://help.helpjuice.com/en_US/api-v3/api-v3#errors - - The request could not be processed, usually due to a missing or invalid parameter. - - The response will also include an error object with an explanation of fields that - are missing or invalid. Here is an example: - - .. code-block:: - - HTTP/1.1 422 Unprocessable Entity - - - { - "errors": [ - { - "email": "is not valid." - } - ] - } """ def __init__(self, request, response): @@ -57,6 +36,7 @@ class UnprocessableEntity(HTTPError): class LLMError(HTTPError): + """Custom error from making Fahamu APi request """ def __init__(self, request, response, msg): super(HTTPError, self).__init__( msg, request=request, response=response) -- cgit v1.2.3 From 167706bb417627b1d9fcacb7cdcebeafe886c1ba Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 13:17:18 +0300 Subject: Remove unused imports for llm.response file --- gn3/llms/response.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/gn3/llms/response.py b/gn3/llms/response.py index 93320e9..7fce75b 100644 --- a/gn3/llms/response.py +++ b/gn3/llms/response.py @@ -45,29 +45,3 @@ class DocIDs(): return self.doc_ids[doc_id] else: return doc_id - - -class RespContext(): - def __init__(self, context): - self.cntxt = context - self.theObj = {} - - def parseIntoObject(self, info): - # check for obj, arr, or val - for key, val in info.items(): - if isinstance(val, list): - self.parseIntoObject(val) - elif isinstance(val, str) or isinstance(val, int): - self.theObj[key] = val - self.theObj[key] = self.val - - -def createAccordionFromJson(theContext): - result = '' - # loop thru json array - for docID, summaryLst in theContext.items(): - # item is a key with a list - comboTxt = '' - for entry in summaryLst: - comboTxt += '\t' + entry['text'] - return result -- cgit v1.2.3 From 92ab53e9f0b10ebf7423626e37b3fe73eb8c0b65 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:05:02 +0300 Subject: Refactor code for llm:response --- gn3/llms/response.py | 60 ++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/gn3/llms/response.py b/gn3/llms/response.py index 7fce75b..2f00312 100644 --- a/gn3/llms/response.py +++ b/gn3/llms/response.py @@ -1,46 +1,42 @@ - -# pylint: skip-file +""" Module contains code for parsing references doc_ids """ +# pylint: disable=C0301 import json import os - basedir = os.path.abspath(os.path.dirname(__file__)) class DocIDs(): + """ Class Method to Parse document id and names""" def __init__(self): - # open doc ids for GN refs - self.doc_ids = self.loadFile("doc_ids.json") - # open doc ids for Diabetes references - self.sugar_doc_ids = self.loadFile("all_files.json") - # format is not what I prefer, it needs to be rebuilt - self.formatDocIDs(self.sugar_doc_ids) - - def loadFile(self, file_name): + """ + init method for Docids + * doc_ids.json: opens doc)ids for gn references + * sugar_doc_ids: open doci_ids for diabetes references + """ + self.doc_ids = self.load_file("doc_ids.json") + self.sugar_doc_ids = self.load_file("all_files.json") + self.format_doc_ids(self.sugar_doc_ids) + + def load_file(self, file_name): + """Method to load and read doc_id files""" file_path = os.path.join(basedir, file_name) if os.path.isfile(file_path): - f = open(file_path, "rb") - result = json.load(f) - f.close() - return result + with open(file_path, "rb") as file_handler: + return json.load(file_handler) else: - raise Exception("\n{0} -- File does not exist\n".format(file_path)) - - def formatDocIDs(self, values): - for _key, _val in values.items(): - if isinstance(_val, list): - for theObject in _val: - docName = self.formatDocumentName(theObject['filename']) - docID = theObject['id'] - self.doc_ids.update({docID: docName}) - - def formatDocumentName(self, val): - result = val.removesuffix('.pdf') - result = result.removesuffix('.txt') - result = result.replace('_', ' ') - return result - - def getInfo(self, doc_id): + raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") + + def format_doc_ids(self, docs): + """method to format doc_ids for list items""" + for _key, val in docs.items(): + if isinstance(val, list): + for doc_obj in val: + doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") + self.doc_ids.update({doc_obj["id"]: doc_name}) + + def get_info(self, doc_id): + """ interface to make read from doc_ids""" if doc_id in self.doc_ids.keys(): return self.doc_ids[doc_id] else: -- cgit v1.2.3 From 69013d298c869a42059af13bc63bef1bbdc7393d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:05:30 +0300 Subject: Update file to use correct import from response file --- gn3/llms/process.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index e38b73e..4edc238 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -49,8 +49,9 @@ def parse_context(context, get_info_func, format_bib_func): def rate_document(task_id, doc_id, rating, auth_token): - """This method is used to provide feedback for a document by making a rating.""" - # todo move this to clients + """This method is used to provide + feedback for a document by making a rating + """ try: url = urljoin(BASE_URL, f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""") @@ -107,7 +108,7 @@ def get_gnqa(query, auth_token, tmp_dir=""): answer = resp_text['data']['answer'] context = resp_text['data']['context'] references = parse_context( - context, DocIDs().getInfo, format_bibliography_info) + context, DocIDs().get_info, format_bibliography_info) references = fetch_pubmed(references, "pubmed.json", tmp_dir) return task_id, answer, references -- cgit v1.2.3 From 75365bd88a720261a1b454f0ea11a840fb3be83e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:20:00 +0300 Subject: Move Parsing Doc_Ids to process file * Context: groups related items --- gn3/llms/process.py | 51 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 4edc238..1881e92 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -1,21 +1,56 @@ """this module contains code for processing response from fahamu client.py""" +# pylint: disable=C0301 import os import string import json +import logging +import requests from urllib.parse import urljoin from urllib.parse import quote -import logging -import requests from gn3.llms.client import GeneNetworkQAClient -from gn3.llms.response import DocIDs BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks' - - -# pylint: disable=C0301 +BASEDIR = os.path.abspath(os.path.dirname(__file__)) + + +class DocIDs(): + """ Class Method to Parse document id and names from files""" + def __init__(self): + """ + init method for Docids + * doc_ids.json: opens doc)ids for gn references + * sugar_doc_ids: open doci_ids for diabetes references + """ + self.doc_ids = self.load_file("doc_ids.json") + self.sugar_doc_ids = self.load_file("all_files.json") + self.format_doc_ids(self.sugar_doc_ids) + + def load_file(self, file_name): + """Method to load and read doc_id files""" + file_path = os.path.join(BASEDIR, file_name) + if os.path.isfile(file_path): + with open(file_path, "rb") as file_handler: + return json.load(file_handler) + else: + raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") + + def format_doc_ids(self, docs): + """method to format doc_ids for list items""" + for _key, val in docs.items(): + if isinstance(val, list): + for doc_obj in val: + doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") + self.doc_ids.update({doc_obj["id"]: doc_name}) + + def get_info(self, doc_id): + """ interface to make read from doc_ids""" + if doc_id in self.doc_ids.keys(): + return self.doc_ids[doc_id] + else: + return doc_id def format_bibliography_info(bib_info): @@ -131,6 +166,6 @@ def fetch_query_results(query, user_id, redis_conn): def get_user_queries(user_id, redis_conn): """methos to fetch all queries for a specific user""" - results = redis_conn.keys(f"LLM:{user_id}*") - return [query for query in [result.partition("-")[2] for result in results] if query != ""] + return [query for query in + [result.partition("-")[2] for result in results] if query != ""] -- cgit v1.2.3 From f911db6b69b16ac5df57b27d213fa88a4c848f50 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:21:47 +0300 Subject: Delete response file * File is obsolete functionality move to process.py file --- gn3/llms/response.py | 43 ------------------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 gn3/llms/response.py diff --git a/gn3/llms/response.py b/gn3/llms/response.py deleted file mode 100644 index 2f00312..0000000 --- a/gn3/llms/response.py +++ /dev/null @@ -1,43 +0,0 @@ -""" Module contains code for parsing references doc_ids """ -# pylint: disable=C0301 -import json -import os - -basedir = os.path.abspath(os.path.dirname(__file__)) - - -class DocIDs(): - """ Class Method to Parse document id and names""" - def __init__(self): - """ - init method for Docids - * doc_ids.json: opens doc)ids for gn references - * sugar_doc_ids: open doci_ids for diabetes references - """ - self.doc_ids = self.load_file("doc_ids.json") - self.sugar_doc_ids = self.load_file("all_files.json") - self.format_doc_ids(self.sugar_doc_ids) - - def load_file(self, file_name): - """Method to load and read doc_id files""" - file_path = os.path.join(basedir, file_name) - if os.path.isfile(file_path): - with open(file_path, "rb") as file_handler: - return json.load(file_handler) - else: - raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") - - def format_doc_ids(self, docs): - """method to format doc_ids for list items""" - for _key, val in docs.items(): - if isinstance(val, list): - for doc_obj in val: - doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") - self.doc_ids.update({doc_obj["id"]: doc_name}) - - def get_info(self, doc_id): - """ interface to make read from doc_ids""" - if doc_id in self.doc_ids.keys(): - return self.doc_ids[doc_id] - else: - return doc_id -- cgit v1.2.3 From a5a6e319e85c28ff3ab9d6f2d8a869bc2ac77ac8 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:25:44 +0300 Subject: Delete function: only useful when training own llm model. --- gn3/llms/process.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 1881e92..e47a997 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -83,26 +83,6 @@ def parse_context(context, get_info_func, format_bib_func): return results -def rate_document(task_id, doc_id, rating, auth_token): - """This method is used to provide - feedback for a document by making a rating - """ - try: - url = urljoin(BASE_URL, - f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""") - headers = {"Authorization": f"Bearer {auth_token}"} - - resp = requests.post(url, headers=headers) - resp.raise_for_status() - - return {"status": "success", **resp.json()} - except requests.exceptions.HTTPError as http_error: - raise RuntimeError(f"HTTP Error Occurred:\ - {http_error.response.text} -with status code- {http_error.response.status_code}") from http_error - except Exception as error: - raise RuntimeError(f"An error occurred: {str(error)}") from error - - def load_file(filename, dir_path): """function to open and load json file""" file_path = os.path.join(dir_path, f"{filename}") -- cgit v1.2.3 From 3913374700521647e93bf9afabb9943746ac5d5b Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 14:27:20 +0300 Subject: Pep8 formatting gn3:llm:process. --- gn3/llms/process.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index e47a997..d080acb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -6,9 +6,7 @@ import json import logging import requests -from urllib.parse import urljoin from urllib.parse import quote - from gn3.llms.client import GeneNetworkQAClient -- cgit v1.2.3 From f30300a82f605fa96130fbcbdcd17c53296d2372 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 16:34:34 +0300 Subject: Minor code refactoring related --- gn3/llms/process.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index d080acb..11961eb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -4,9 +4,9 @@ import os import string import json import logging +from urllib.parse import quote import requests -from urllib.parse import quote from gn3.llms.client import GeneNetworkQAClient @@ -106,15 +106,24 @@ def fetch_pubmed(references, file_name, data_dir=""): return references -def get_gnqa(query, auth_token, tmp_dir=""): - """entry function for the gn3 api endpoint()""" +def get_gnqa(query, auth_token, data_dir=""): + """entry function for the gn3 api endpoint() + ARGS: + query: what is a gene + auth_token: token to connect to api_client + data_dir: base datirectory for gn3 data + Returns: + task_id: fahamu unique identifier for task + answer + references: contains doc_name,reference,pub_med_info + """ - api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) + api_client = GeneNetworkQAClient(requests.Session(), auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") - res, success = api_client.get_answer(task_id) - if success == 1: + res, status = api_client.get_answer(task_id) + if status == 1: resp_text = filter_response_text(res.text) if resp_text.get("data") is None: return task_id, "Please try to rephrase your question to receive feedback", [] @@ -122,7 +131,7 @@ def get_gnqa(query, auth_token, tmp_dir=""): context = resp_text['data']['context'] references = parse_context( context, DocIDs().get_info, format_bibliography_info) - references = fetch_pubmed(references, "pubmed.json", tmp_dir) + references = fetch_pubmed(references, "pubmed.json", data_dir) return task_id, answer, references else: -- cgit v1.2.3 From fe23477126b482472f6193797f7d88f59421900c Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 16:35:03 +0300 Subject: Init code refactoring for clients fahamu interface --- gn3/llms/client.py | 66 +++++++++--------------------------------------------- 1 file changed, 11 insertions(+), 55 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index b843907..14dcef3 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,20 +1,19 @@ +"""Module Contains code for making request to fahamu Api""" # pylint: skip-file import json import string -import os import time import requests - from requests import Session from requests.packages.urllib3.util.retry import Retry from requests.adapters import HTTPAdapter from urllib.parse import quote from gn3.llms.errors import LLMError -basedir = os.path.join(os.path.dirname(__file__)) - class TimeoutHTTPAdapter(HTTPAdapter): + """HTTP TimeoutAdapter """ + # todo rework on this def __init__(self, timeout, *args, **kwargs): """TimeoutHTTPAdapter constructor. Args: @@ -79,48 +78,29 @@ class GeneNetworkQAClient(Session): self.mount("https://", adapter) self.mount("http://", adapter) - @staticmethod - def format_bibliography_info(bib_info): - - if isinstance(bib_info, str): - # Remove '.txt' - bib_info = bib_info.removesuffix('.txt') - elif isinstance(bib_info, dict): - # Format string bibliography information - bib_info = "{0}.{1}.{2}.{3} ".format(bib_info.get('author', ''), - bib_info.get('title', ''), - bib_info.get('year', ''), - bib_info.get('doi', '')) - return bib_info - - @staticmethod def ask_the_documents(self, extend_url, my_auth): try: response = requests.post( self.base_url + extend_url, data={}, headers=my_auth) response.raise_for_status() except requests.exceptions.RequestException as e: - # Handle the exception appropriately, e.g., log the error raise RuntimeError(f"Error making the request: {e}") - if response.status_code != 200: return GeneNetworkQAClient.negative_status_msg(response), 0 - task_id = GeneNetworkQAClient.get_task_id_from_result(response) response = GeneNetworkQAClient.get_answer_using_task_id(task_id, my_auth) - if response.status_code != 200: - return GeneNetworkQAClient.negative_status_msg(response), 0 - return response, 1 @staticmethod def negative_status_msg(response): + """ handler for non 200 response from fahamu api""" return f"Error: Status code -{response.status_code}- Reason::{response.reason}" def ask(self, exUrl, *args, **kwargs): + """fahamu ask api interface""" askUrl = self.BASE_URL + exUrl res = self.custom_request('POST', askUrl, *args, **kwargs) if (res.status_code != 200): @@ -129,6 +109,7 @@ class GeneNetworkQAClient(Session): return res, task_id def get_answer(self, taskid, *args, **kwargs): + """Fahamu get answer interface""" query = self.answer_url + self.extendTaskID(taskid) res = self.custom_request('GET', query, *args, **kwargs) if (res.status_code != 200): @@ -136,15 +117,13 @@ class GeneNetworkQAClient(Session): return res, 1 def custom_request(self, method, url, *args, **kwargs): - + """ make custom request to fahamu api ask and get response""" max_retries = 50 retry_delay = 3 - for i in range(max_retries): try: response = super().request(method, url, *args, **kwargs) response.raise_for_status() - except requests.exceptions.HTTPError as error: if error.response.status_code == 500: raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") @@ -165,33 +144,23 @@ class GeneNetworkQAClient(Session): @staticmethod def get_task_id_from_result(response): + """method to get task_id from response""" task_id = json.loads(response.text) - result = f"?task_id={task_id.get('task_id', '')}" - return result + return f"?task_id={task_id.get('task_id', '')}" def get_answer_using_task_id(self, extend_url, my_auth): + """call this method with task id to fetch response""" try: response = requests.get( self.answer_url + extend_url, data={}, headers=my_auth) response.raise_for_status() return response except requests.exceptions.RequestException as error: - # Handle the exception appropriately, e.g., log the error raise error @staticmethod def filter_response_text(val): - """ - Filters out non-printable characters from - the input string and parses it as JSON. - - Args: - val (str): Input string to be filtered and parsed. - - Returns: - dict: Parsed JSON object. - # remove this - """ + """method to filter out non-printable chacracters""" return json.loads(''.join([str(char) for char in val if char in string.printable])) @@ -200,16 +169,3 @@ class GeneNetworkQAClient(Session): def extendTaskID(self, task_id): return '?task_id=' + str(task_id['task_id']) - - def get_gnqa(self, query): - qstr = quote(query) - res, task_id = GeneNetworkQAClient.ask('?ask=' + qstr) - res, success = GeneNetworkQAClient.get_answer(task_id) - - if success == 1: - resp_text = GeneNetworkQAClient.filter_response_text(res.text) - answer = resp_text.get('data', {}).get('answer', '') - context = resp_text.get('data', {}).get('context', '') - return answer, context - else: - return res, "Unfortunately, I have nothing." -- cgit v1.2.3 From e0aadacbfc23f240c9dad1d0cd430ffcfa99d547 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 17:50:57 +0300 Subject: Pylint Fixes. --- gn3/llms/client.py | 83 ++++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 14dcef3..2e9898f 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,13 +1,13 @@ """Module Contains code for making request to fahamu Api""" -# pylint: skip-file +# pylint: disable=C0301 import json -import string import time + import requests from requests import Session -from requests.packages.urllib3.util.retry import Retry from requests.adapters import HTTPAdapter -from urllib.parse import quote + +from urllib3.util import Retry from gn3.llms.errors import LLMError @@ -56,15 +56,14 @@ class GeneNetworkQAClient(Session): api_key="XXXXXXXXXXXXXXXXXXX...") """ - BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks' - def __init__(self, account, api_key, version="v3", timeout=30, total_retries=5, backoff_factor=30): super().__init__() self.headers.update( {"Authorization": "Bearer " + api_key}) - self.answer_url = f"{self.BASE_URL}/answers" - self.feedback_url = f"{self.BASE_URL}/feedback" + self.base_url = "https://genenetwork.fahamuai.com/api/tasks" + self.answer_url = f"{self.base_url}/answers" + self.feedback_url = f"{self.base_url}/feedback" adapter = TimeoutHTTPAdapter( timeout=timeout, @@ -83,8 +82,8 @@ class GeneNetworkQAClient(Session): response = requests.post( self.base_url + extend_url, data={}, headers=my_auth) response.raise_for_status() - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Error making the request: {e}") + except requests.exceptions.RequestException as error: + raise RuntimeError(f"Error making the request: {error}") from error if response.status_code != 200: return GeneNetworkQAClient.negative_status_msg(response), 0 task_id = GeneNetworkQAClient.get_task_id_from_result(response) @@ -99,36 +98,50 @@ class GeneNetworkQAClient(Session): """ handler for non 200 response from fahamu api""" return f"Error: Status code -{response.status_code}- Reason::{response.reason}" - def ask(self, exUrl, *args, **kwargs): + def ask(self, ex_url, *args, **kwargs): """fahamu ask api interface""" - askUrl = self.BASE_URL + exUrl - res = self.custom_request('POST', askUrl, *args, **kwargs) - if (res.status_code != 200): + res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs) + if res.status_code != 200: return self.negative_status_msg(res), 0 - task_id = self.getTaskIDFromResult(res) - return res, task_id + return res, json.loads(res.text) def get_answer(self, taskid, *args, **kwargs): """Fahamu get answer interface""" - query = self.answer_url + self.extendTaskID(taskid) + query = f"{self.answer_url}?task_id={taskid['task_id']}" res = self.custom_request('GET', query, *args, **kwargs) - if (res.status_code != 200): + if res.status_code != 200: return self.negative_status_msg(res), 0 return res, 1 + @staticmethod + def get_task_id_from_result(response): + """method to get task_id from response""" + task_id = json.loads(response.text) + return f"?task_id={task_id.get('task_id', '')}" + + def get_answer_using_task_id(self, extend_url, my_auth): + """call this method with task id to fetch response""" + try: + response = requests.get( + self.answer_url + extend_url, data={}, headers=my_auth) + response.raise_for_status() + return response + except requests.exceptions.RequestException as error: + raise error + def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" max_retries = 50 retry_delay = 3 - for i in range(max_retries): + for _i in range(max_retries): try: response = super().request(method, url, *args, **kwargs) response.raise_for_status() except requests.exceptions.HTTPError as error: if error.response.status_code == 500: - raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") + raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error elif error.response.status_code == 404: - raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.BASE_URL}") + raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.base_url}") from error raise error except requests.exceptions.RequestException as error: raise error @@ -141,31 +154,3 @@ class GeneNetworkQAClient(Session): else: time.sleep(retry_delay) return response - - @staticmethod - def get_task_id_from_result(response): - """method to get task_id from response""" - task_id = json.loads(response.text) - return f"?task_id={task_id.get('task_id', '')}" - - def get_answer_using_task_id(self, extend_url, my_auth): - """call this method with task id to fetch response""" - try: - response = requests.get( - self.answer_url + extend_url, data={}, headers=my_auth) - response.raise_for_status() - return response - except requests.exceptions.RequestException as error: - raise error - - @staticmethod - def filter_response_text(val): - """method to filter out non-printable chacracters""" - return json.loads(''.join([str(char) for char in val if char - in string.printable])) - - def getTaskIDFromResult(self, res): - return json.loads(res.text) - - def extendTaskID(self, task_id): - return '?task_id=' + str(task_id['task_id']) -- cgit v1.2.3 From 50c8500105912a6380ea8f971ccfb17ef0994279 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 16 May 2024 18:56:41 +0300 Subject: Refactor code for http request adapters. --- gn3/llms/client.py | 43 +++++++++++-------------------------------- gn3/llms/process.py | 2 +- 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 2e9898f..810227f 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,5 +1,6 @@ """Module Contains code for making request to fahamu Api""" # pylint: disable=C0301 +# pylint: disable=R0913 import json import time @@ -24,13 +25,13 @@ class TimeoutHTTPAdapter(HTTPAdapter): self.timeout = timeout super().__init__(*args, **kwargs) - def send(self, request, **kwargs): + def send(self, *args, **kwargs): """Override :obj:`HTTPAdapter` send method to add a default timeout.""" timeout = kwargs.get("timeout") if timeout is None: kwargs["timeout"] = self.timeout - return super().send(request, **kwargs) + return super().send(*args, **kwargs) class GeneNetworkQAClient(Session): @@ -77,21 +78,15 @@ class GeneNetworkQAClient(Session): self.mount("https://", adapter) self.mount("http://", adapter) - def ask_the_documents(self, extend_url, my_auth): + def get_answer_using_task_id(self, extend_url, my_auth): + """call this method with task id to fetch response""" try: - response = requests.post( - self.base_url + extend_url, data={}, headers=my_auth) + response = requests.get( + self.answer_url + extend_url, data={}, headers=my_auth) response.raise_for_status() + return response except requests.exceptions.RequestException as error: - raise RuntimeError(f"Error making the request: {error}") from error - if response.status_code != 200: - return GeneNetworkQAClient.negative_status_msg(response), 0 - task_id = GeneNetworkQAClient.get_task_id_from_result(response) - response = GeneNetworkQAClient.get_answer_using_task_id(task_id, - my_auth) - if response.status_code != 200: - return GeneNetworkQAClient.negative_status_msg(response), 0 - return response, 1 + raise error @staticmethod def negative_status_msg(response): @@ -102,7 +97,7 @@ class GeneNetworkQAClient(Session): """fahamu ask api interface""" res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs) if res.status_code != 200: - return self.negative_status_msg(res), 0 + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 return res, json.loads(res.text) def get_answer(self, taskid, *args, **kwargs): @@ -110,25 +105,9 @@ class GeneNetworkQAClient(Session): query = f"{self.answer_url}?task_id={taskid['task_id']}" res = self.custom_request('GET', query, *args, **kwargs) if res.status_code != 200: - return self.negative_status_msg(res), 0 + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 return res, 1 - @staticmethod - def get_task_id_from_result(response): - """method to get task_id from response""" - task_id = json.loads(response.text) - return f"?task_id={task_id.get('task_id', '')}" - - def get_answer_using_task_id(self, extend_url, my_auth): - """call this method with task id to fetch response""" - try: - response = requests.get( - self.answer_url + extend_url, data={}, headers=my_auth) - response.raise_for_status() - return response - except requests.exceptions.RequestException as error: - raise error - def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" max_retries = 50 diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 11961eb..9cb09a1 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -118,7 +118,7 @@ def get_gnqa(query, auth_token, data_dir=""): references: contains doc_name,reference,pub_med_info """ - api_client = GeneNetworkQAClient(requests.Session(), auth_token) + api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") -- cgit v1.2.3 From 4d39c26b65aad3fa48d35fc11007f5f3afe1c112 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 17 May 2024 12:05:16 +0300 Subject: Timeout code refactoring. --- gn3/llms/client.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 810227f..05e3500 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -7,30 +7,23 @@ import time import requests from requests import Session from requests.adapters import HTTPAdapter +from requests.adapters import Retry -from urllib3.util import Retry from gn3.llms.errors import LLMError class TimeoutHTTPAdapter(HTTPAdapter): - """HTTP TimeoutAdapter """ - # todo rework on this + """Set a default timeout for HTTP calls """ def __init__(self, timeout, *args, **kwargs): - """TimeoutHTTPAdapter constructor. - Args: - timeout (int): How many seconds to wait for the server to - send data before - giving up. - """ + """TimeoutHTTPAdapter constructor.""" self.timeout = timeout super().__init__(*args, **kwargs) def send(self, *args, **kwargs): """Override :obj:`HTTPAdapter` send method to add a default timeout.""" - timeout = kwargs.get("timeout") - if timeout is None: - kwargs["timeout"] = self.timeout - + kwargs["timeout"] = ( + kwargs["timeout"] if kwargs.get("timeout") else self.timeout + ) return super().send(*args, **kwargs) -- cgit v1.2.3 From d3bc323fe3a965ee5b6917987c4fe7662056e560 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 17 May 2024 13:34:37 +0300 Subject: Refactor custom request codebase. --- gn3/llms/client.py | 40 +++++++++++++++++++--------------------- gn3/llms/process.py | 5 ++--- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 05e3500..d57bca2 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -1,6 +1,5 @@ """Module Contains code for making request to fahamu Api""" # pylint: disable=C0301 -# pylint: disable=R0913 import json import time @@ -36,9 +35,7 @@ class GeneNetworkQAClient(Session): request timeouts, and request retries. Args: - account (str): Base address subdomain. api_key (str): API key. - version (str, optional): API version, defaults to "v3". timeout (int, optional): Timeout value, defaults to 5. total_retries (int, optional): Total retries value, defaults to 5. backoff_factor (int, optional): Retry backoff factor value, @@ -50,7 +47,7 @@ class GeneNetworkQAClient(Session): api_key="XXXXXXXXXXXXXXXXXXX...") """ - def __init__(self, account, api_key, version="v3", timeout=30, + def __init__(self, api_key, timeout=30, total_retries=5, backoff_factor=30): super().__init__() self.headers.update( @@ -95,11 +92,14 @@ class GeneNetworkQAClient(Session): def get_answer(self, taskid, *args, **kwargs): """Fahamu get answer interface""" - query = f"{self.answer_url}?task_id={taskid['task_id']}" - res = self.custom_request('GET', query, *args, **kwargs) - if res.status_code != 200: - return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 - return res, 1 + try: + query = f"{self.answer_url}?task_id={taskid['task_id']}" + res = self.custom_request('GET', query, *args, **kwargs) + if res.status_code != 200: + return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0 + return res, 1 + except TimeoutError: + return "Timeout error occured:try to rephrase your query", 0 def custom_request(self, method, url, *args, **kwargs): """ make custom request to fahamu api ask and get response""" @@ -109,20 +109,18 @@ class GeneNetworkQAClient(Session): try: response = super().request(method, url, *args, **kwargs) response.raise_for_status() + if response.ok: + if method.lower() == "get" and response.json().get("data") is None: + time.sleep(retry_delay) + continue + return response + else: + time.sleep(retry_delay) except requests.exceptions.HTTPError as error: if error.response.status_code == 500: raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error - elif error.response.status_code == 404: - raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.base_url}") from error - raise error + raise LLMError(error.request, error.response, + f"HTTP error occurred with error status:{error.response.status_code}") from error except requests.exceptions.RequestException as error: raise error - if response.ok: - if method.lower() == "get" and response.json().get("data") is None: - time.sleep(retry_delay) - continue - else: - return response - else: - time.sleep(retry_delay) - return response + raise TimeoutError diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 9cb09a1..4725bcb 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -5,7 +5,6 @@ import string import json import logging from urllib.parse import quote -import requests from gn3.llms.client import GeneNetworkQAClient @@ -118,7 +117,7 @@ def get_gnqa(query, auth_token, data_dir=""): references: contains doc_name,reference,pub_med_info """ - api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token) + api_client = GeneNetworkQAClient(api_key=auth_token) res, task_id = api_client.ask('?ask=' + quote(query), auth_token) if task_id == 0: raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}") @@ -135,7 +134,7 @@ def get_gnqa(query, auth_token, data_dir=""): return task_id, answer, references else: - return task_id, "Please try to rephrase your question to receive feedback", [] + return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", [] def fetch_query_results(query, user_id, redis_conn): -- cgit v1.2.3