Merge pull request #163 from genenetwork/chores/llm3-refactoring

Chores/llm3 refactoring
author: Alexander Kabui 2024-05-17 16:10:43 +0300
committer: GitHub 2024-05-17 16:10:43 +0300
commit: f8c87e6fd1b26887c84a390a1a253d2c629942bc (patch)
tree: b0793a643ad30ae209d426866b80391f4143c29b
parent: ae8a12f752281096ee5755679b0b29d834afa3b1 (diff)
parent: d3bc323fe3a965ee5b6917987c4fe7662056e560 (diff)
download: genenetwork3-f8c87e6fd1b26887c84a390a1a253d2c629942bc.tar.gz
4 files changed, 123 insertions, 290 deletions
diff --git a/gn3/llms/client.py b/gn3/llms/client.py
index 042becd..d57bca2 100644
--- a/gn3/llms/client.py
+++ b/gn3/llms/client.py
@@ -1,72 +1,60 @@
-# pylint: skip-file
+"""Module  Contains code for making request to fahamu Api"""
+# pylint: disable=C0301
 import json
-import string
-import os
-import datetime
 import time
-import requests
 
-from requests import Session
-from urllib.parse import urljoin
-from requests.packages.urllib3.util.retry import Retry
-from requests import HTTPError
+import requests
 from requests import Session
 from requests.adapters import HTTPAdapter
-from urllib.request import urlretrieve
-from urllib.parse import quote
-from gn3.llms.errors import UnprocessableEntity
-from gn3.llms.errors import LLMError
+from requests.adapters import Retry
 
-basedir = os.path.join(os.path.dirname(__file__))
+from gn3.llms.errors import LLMError
 
 
 class TimeoutHTTPAdapter(HTTPAdapter):
+    """Set a default timeout for HTTP calls """
     def __init__(self, timeout, *args, **kwargs):
-        """TimeoutHTTPAdapter constructor.
-        Args:
-            timeout (int): How many seconds to wait for the server to send data before
-                giving up.
-        """
+        """TimeoutHTTPAdapter constructor."""
         self.timeout = timeout
         super().__init__(*args, **kwargs)
 
-    def send(self, request, **kwargs):
+    def send(self, *args, **kwargs):
         """Override :obj:`HTTPAdapter` send method to add a default timeout."""
-        timeout = kwargs.get("timeout")
-        if timeout is None:
-            kwargs["timeout"] = self.timeout
-
-        return super().send(request, **kwargs)
+        kwargs["timeout"] = (
+            kwargs["timeout"] if kwargs.get("timeout") else self.timeout
+        )
+        return super().send(*args, **kwargs)
 
 
 class GeneNetworkQAClient(Session):
     """GeneNetworkQA Client
 
     This class provides a client object interface to the GeneNetworkQA API.
-    It extends the `requests.Session` class and includes authorization, base URL,
+    It extends the `requests.Session` class and includes authorization,
+    base URL,
     request timeouts, and request retries.
 
     Args:
-        account (str): Base address subdomain.
         api_key (str): API key.
-        version (str, optional): API version, defaults to "v3".
         timeout (int, optional): Timeout value, defaults to 5.
         total_retries (int, optional): Total retries value, defaults to 5.
-        backoff_factor (int, optional): Retry backoff factor value, defaults to 30.
+        backoff_factor (int, optional): Retry backoff factor value,
+    defaults to 30.
 
     Usage:
         from genenetworkqa import GeneNetworkQAClient
-        gnqa = GeneNetworkQAClient(account="account-name", api_key="XXXXXXXXXXXXXXXXXXX...")
+        gnqa = GeneNetworkQAClient(account="account-name",
+    api_key="XXXXXXXXXXXXXXXXXXX...")
     """
 
-    BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks'
-
-    def __init__(self, account, api_key, version="v3", timeout=30, total_retries=5, backoff_factor=30):
+    def __init__(self, api_key, timeout=30,
+                 total_retries=5, backoff_factor=30):
         super().__init__()
         self.headers.update(
             {"Authorization": "Bearer " + api_key})
-        self.answer_url = f"{self.BASE_URL}/answers"
-        self.feedback_url = f"{self.BASE_URL}/feedback"
+        self.base_url = "https://genenetwork.fahamuai.com/api/tasks"
+        self.answer_url = f"{self.base_url}/answers"
+        self.feedback_url = f"{self.base_url}/feedback"
 
         adapter = TimeoutHTTPAdapter(
             timeout=timeout,
@@ -80,141 +68,59 @@ class GeneNetworkQAClient(Session):
         self.mount("https://", adapter)
         self.mount("http://", adapter)
 
-    @staticmethod
-    def format_bibliography_info(bib_info):
-
-        if isinstance(bib_info, str):
-            # Remove '.txt'
-            bib_info = bib_info.removesuffix('.txt')
-        elif isinstance(bib_info, dict):
-            # Format string bibliography information
-            bib_info = "{0}.{1}.{2}.{3} ".format(bib_info.get('author', ''),
-                                                 bib_info.get('title', ''),
-                                                 bib_info.get('year', ''),
-                                                 bib_info.get('doi', ''))
-        return bib_info
-
-    @staticmethod
-    def ask_the_documents(extend_url, my_auth):
+    def get_answer_using_task_id(self, extend_url, my_auth):
+        """call this method with task id to fetch response"""
         try:
-            response = requests.post(
-                base_url + extend_url, data={}, headers=my_auth)
+            response = requests.get(
+               self.answer_url + extend_url, data={}, headers=my_auth)
             response.raise_for_status()
-        except requests.exceptions.RequestException as e:
-            # Handle the exception appropriately, e.g., log the error
-            raise RuntimeError(f"Error making the request: {e}")
-
-        if response.status_code != 200:
-            return negative_status_msg(response), 0
-
-        task_id = get_task_id_from_result(response)
-        response = get_answer_using_task_id(task_id, my_auth)
-
-        if response.status_code != 200:
-
-            return negative_status_msg(response), 0
-
-        return response, 1
+            return response
+        except requests.exceptions.RequestException as error:
+            raise error
 
     @staticmethod
     def negative_status_msg(response):
+        """ handler for non 200 response from fahamu api"""
         return f"Error: Status code -{response.status_code}- Reason::{response.reason}"
-      #  return f"Problems\n\tStatus code => {response.status_code}\n\tReason => {response.reason}"
 
-    def ask(self, exUrl, *args, **kwargs):
-        askUrl = self.BASE_URL + exUrl
-        res = self.custom_request('POST', askUrl, *args, **kwargs)
-        if (res.status_code != 200):
-            return self.negative_status_msg(res), 0
-        task_id = self.getTaskIDFromResult(res)
-        return res, task_id
+    def ask(self, ex_url, *args, **kwargs):
+        """fahamu ask api interface"""
+        res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs)
+        if res.status_code != 200:
+            return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
+        return res, json.loads(res.text)
 
     def get_answer(self, taskid, *args, **kwargs):
-        query = self.answer_url + self.extendTaskID(taskid)
-        res = self.custom_request('GET', query, *args, **kwargs)
-        if (res.status_code != 200):
-            return self.negative_status_msg(res), 0
-        return res, 1
+        """Fahamu get answer interface"""
+        try:
+            query = f"{self.answer_url}?task_id={taskid['task_id']}"
+            res = self.custom_request('GET', query, *args, **kwargs)
+            if res.status_code != 200:
+                return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
+            return res, 1
+        except TimeoutError:
+            return "Timeout error occured:try to rephrase your query", 0
 
     def custom_request(self, method, url, *args, **kwargs):
-
+        """ make custom request to fahamu api ask and get response"""
         max_retries = 50
         retry_delay = 3
-
-        for i in range(max_retries):
+        for _i in range(max_retries):
             try:
                 response = super().request(method, url, *args, **kwargs)
                 response.raise_for_status()
-
+                if response.ok:
+                    if method.lower() == "get" and response.json().get("data") is None:
+                        time.sleep(retry_delay)
+                        continue
+                    return response
+                else:
+                    time.sleep(retry_delay)
             except requests.exceptions.HTTPError as error:
-                if error.response.status_code ==500:
-                    raise LLMError(error.request, error.response, f"Response Error,status_code:{error.response.status_code},Reason: Use of Invalid Token")
-                elif error.response.status_code ==404:
-                    raise LLMError(error.request,error.response,f"404 Client Error: Not Found for url: {self.BASE_URL}")
-                raise error
-
+                if error.response.status_code == 500:
+                    raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error
+                raise LLMError(error.request, error.response,
+            f"HTTP error occurred  with error status:{error.response.status_code}") from error
             except requests.exceptions.RequestException as error:
-                raise error 
-
-
-
-
-            if response.ok:
-                if method.lower() == "get" and response.json().get("data") is None:
-                    time.sleep(retry_delay)
-                    continue
-                else:
-                    return response
-            else:
-                time.sleep(retry_delay)
-            return response
-
-    @staticmethod
-    def get_task_id_from_result(response):
-        task_id = json.loads(response.text)
-        result = f"?task_id={task_id.get('task_id', '')}"
-        return result
-
-    @staticmethod
-    def get_answer_using_task_id(extend_url, my_auth):
-        try:
-            response = requests.get(
-                answer_url + extend_url, data={}, headers=my_auth)
-            response.raise_for_status()
-            return response
-        except requests.exceptions.RequestException as error:
-            # Handle the exception appropriately, e.g., log the error
-            raise error
-
-    @staticmethod
-    def filter_response_text(val):
-        """
-        Filters out non-printable characters from the input string and parses it as JSON.
-
-        Args:
-            val (str): Input string to be filtered and parsed.
-
-        Returns:
-            dict: Parsed JSON object.
-        # remove  this
-        """
-        return json.loads(''.join([str(char) for char in val if char in string.printable]))
-
-    def getTaskIDFromResult(self, res):
-        return json.loads(res.text)
-
-    def extendTaskID(self, task_id):
-        return '?task_id=' + str(task_id['task_id'])
-
-    def get_gnqa(self, query):
-        qstr = quote(query)
-        res, task_id = api_client.ask('?ask=' + qstr)
-        res, success = api_client.get_answer(task_id)
-
-        if success == 1:
-            resp_text = filter_response_text(res.text)
-            answer = resp_text.get('data', {}).get('answer', '')
-            context = resp_text.get('data', {}).get('context', '')
-            return answer, context
-        else:
-            return res, "Unfortunately, I have nothing."
+                raise error
+        raise TimeoutError
diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py
index e9f7c02..af3d7b0 100644
--- a/gn3/llms/errors.py
+++ b/gn3/llms/errors.py
@@ -1,32 +1,11 @@
-
-# pylint: skip-file
+""" Error handlers for Fahamu Api"""
 import json
-
 from requests import HTTPError
 
 
 class UnprocessableEntity(HTTPError):
-    """An HTTP 422 Unprocessable Entity error occurred.
-
+    """Error for  HTTP 422 Unprocessable Entity
     https://help.helpjuice.com/en_US/api-v3/api-v3#errors
-
-    The request could not be processed, usually due to a missing or invalid parameter.
-
-    The response will also include an error object with an explanation of fields that
-    are missing or invalid. Here is an example:
-
-    .. code-block::
-
-        HTTP/1.1 422 Unprocessable Entity
-
-
-        {
-          "errors": [
-            {
-              "email": "is not valid."
-            }
-          ]
-        }
     """
 
     def __init__(self, request, response):
@@ -57,6 +36,7 @@ class UnprocessableEntity(HTTPError):
 
 
 class LLMError(HTTPError):
+    """Custom error from making Fahamu APi request """
     def __init__(self, request, response, msg):
         super(HTTPError, self).__init__(
             msg, request=request, response=response)
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index e38b73e..4725bcb 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -1,21 +1,53 @@
 """this module contains code for processing response from fahamu client.py"""
+# pylint: disable=C0301
 import os
 import string
 import json
-
-from urllib.parse import urljoin
-from urllib.parse import quote
 import logging
-import requests
+from urllib.parse import quote
 
 from gn3.llms.client import GeneNetworkQAClient
-from gn3.llms.response import DocIDs
 
 
 BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks'
-
-
-# pylint: disable=C0301
+BASEDIR = os.path.abspath(os.path.dirname(__file__))
+
+
+class DocIDs():
+    """ Class Method to Parse document id and names from files"""
+    def __init__(self):
+        """
+        init method for Docids
+        * doc_ids.json: opens doc)ids for gn references
+        * sugar_doc_ids:  open doci_ids for diabetes references
+        """
+        self.doc_ids = self.load_file("doc_ids.json")
+        self.sugar_doc_ids = self.load_file("all_files.json")
+        self.format_doc_ids(self.sugar_doc_ids)
+
+    def load_file(self, file_name):
+        """Method to load and read doc_id files"""
+        file_path = os.path.join(BASEDIR, file_name)
+        if os.path.isfile(file_path):
+            with open(file_path, "rb") as file_handler:
+                return json.load(file_handler)
+        else:
+            raise FileNotFoundError(f"{file_path}-- FIle does not exist\n")
+
+    def format_doc_ids(self, docs):
+        """method to format doc_ids for list items"""
+        for _key, val in docs.items():
+            if isinstance(val, list):
+                for doc_obj in val:
+                    doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "")
+                    self.doc_ids.update({doc_obj["id"]:  doc_name})
+
+    def get_info(self, doc_id):
+        """ interface to make read from doc_ids"""
+        if doc_id in self.doc_ids.keys():
+            return self.doc_ids[doc_id]
+        else:
+            return doc_id
 
 
 def format_bibliography_info(bib_info):
@@ -48,25 +80,6 @@ def parse_context(context, get_info_func, format_bib_func):
     return results
 
 
-def rate_document(task_id, doc_id, rating, auth_token):
-    """This method is used to provide feedback for a document by making a rating."""
-    # todo move this to clients
-    try:
-        url = urljoin(BASE_URL,
-                      f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""")
-        headers = {"Authorization": f"Bearer {auth_token}"}
-
-        resp = requests.post(url, headers=headers)
-        resp.raise_for_status()
-
-        return {"status": "success", **resp.json()}
-    except requests.exceptions.HTTPError as http_error:
-        raise RuntimeError(f"HTTP Error Occurred:\
-            {http_error.response.text} -with status code- {http_error.response.status_code}") from http_error
-    except Exception as error:
-        raise RuntimeError(f"An error occurred: {str(error)}") from error
-
-
 def load_file(filename, dir_path):
     """function to open and load json file"""
     file_path = os.path.join(dir_path, f"{filename}")
@@ -92,27 +105,36 @@ def fetch_pubmed(references, file_name, data_dir=""):
         return references
 
 
-def get_gnqa(query, auth_token, tmp_dir=""):
-    """entry function for the gn3 api endpoint()"""
+def get_gnqa(query, auth_token, data_dir=""):
+    """entry function for the gn3 api endpoint()
+    ARGS:
+         query: what is  a gene
+         auth_token: token to connect to api_client
+         data_dir:  base datirectory for gn3 data
+    Returns:
+         task_id: fahamu unique identifier for task
+         answer
+         references: contains doc_name,reference,pub_med_info
+    """
 
-    api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token)
+    api_client = GeneNetworkQAClient(api_key=auth_token)
     res, task_id = api_client.ask('?ask=' + quote(query), auth_token)
     if task_id == 0:
         raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
-    res, success = api_client.get_answer(task_id)
-    if success == 1:
+    res, status = api_client.get_answer(task_id)
+    if status == 1:
         resp_text = filter_response_text(res.text)
         if resp_text.get("data") is None:
             return task_id, "Please try to rephrase your question to receive feedback", []
         answer = resp_text['data']['answer']
         context = resp_text['data']['context']
         references = parse_context(
-            context, DocIDs().getInfo, format_bibliography_info)
-        references = fetch_pubmed(references, "pubmed.json", tmp_dir)
+            context, DocIDs().get_info, format_bibliography_info)
+        references = fetch_pubmed(references, "pubmed.json", data_dir)
 
         return task_id, answer, references
     else:
-        return task_id, "Please try to rephrase your question to receive feedback", []
+        return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", []
 
 
 def fetch_query_results(query, user_id, redis_conn):
@@ -130,6 +152,6 @@ def fetch_query_results(query, user_id, redis_conn):
 
 def get_user_queries(user_id, redis_conn):
     """methos to fetch all queries for a specific user"""
-
     results = redis_conn.keys(f"LLM:{user_id}*")
-    return [query for query in [result.partition("-")[2] for result in results] if query != ""]
+    return [query for query in
+            [result.partition("-")[2] for result in results] if query != ""]
diff --git a/gn3/llms/response.py b/gn3/llms/response.py
deleted file mode 100644
index 11cbd94..0000000
--- a/gn3/llms/response.py
+++ /dev/null
@@ -1,75 +0,0 @@
-
-# pylint: skip-file
-import string
-import json
-import os
-
-
-basedir           = os.path.abspath(os.path.dirname(__file__))
-
-
-class DocIDs():
-    def __init__(self):
-        # open doc ids for GN refs
-        self.doc_ids = self.loadFile("doc_ids.json")
-        # open doc ids for Diabetes references
-        self.sugar_doc_ids = self.loadFile("all_files.json")
-        # format is not what I prefer, it needs to be rebuilt
-        self.formatDocIDs(self.sugar_doc_ids)
-
-    def loadFile(self, file_name):
-        file_path = os.path.join(basedir, file_name)
-        if os.path.isfile(file_path):
-            f = open(file_path, "rb")
-            result = json.load(f)
-            f.close()
-            return result
-        else:
-            raise Exception("\n{0} -- File does not exist\n".format(file_path))
-    
-    def formatDocIDs(self, values):
-        for _key, _val in values.items():
-            if isinstance(_val, list):
-                for theObject in _val:
-                    docName = self.formatDocumentName(theObject['filename'])
-                    docID   = theObject['id']
-                    self.doc_ids.update({docID: docName})
-                    
-    def formatDocumentName(self, val):
-       result = val.removesuffix('.pdf') 
-       result = result.removesuffix('.txt') 
-       result = result.replace('_', ' ')
-       return result
-
-
-    def getInfo(self, doc_id):
-        if doc_id in self.doc_ids.keys():
-            return self.doc_ids[doc_id]
-        else:
-            return doc_id
-
-class RespContext():
-    def __init__(self, context):
-        self.cntxt = context
-        self.theObj = {}
-
-    def parseIntoObject(self, info):
-        # check for obj, arr, or val
-        for key, val in info.items():
-            if isinstance(val, list):
-                self.parseIntoObject(val)
-            elif isinstance(val, str) or isinstance(val, int):
-                self.theObj[key] = val
-            self.theObj[key] = self.val
-
-
-def createAccordionFromJson(theContext):
-    result = ''
-    # loop thru json array
-    ndx = 0
-    for docID, summaryLst in theContext.items():
-        # item is a key with a list
-        comboTxt = ''
-        for entry in summaryLst:
-            comboTxt += '\t' + entry['text']
-    return result
-\ No newline at end of file
author	Alexander Kabui	2024-05-17 16:10:43 +0300
committer	GitHub	2024-05-17 16:10:43 +0300
commit	f8c87e6fd1b26887c84a390a1a253d2c629942bc (patch)
tree	b0793a643ad30ae209d426866b80391f4143c29b
parent	ae8a12f752281096ee5755679b0b29d834afa3b1 (diff)
parent	d3bc323fe3a965ee5b6917987c4fe7662056e560 (diff)
download	genenetwork3-f8c87e6fd1b26887c84a390a1a253d2c629942bc.tar.gz