Merge pull request #188 from genenetwork/chores/merge-gnqa-api

Chores/merge gnqa api
author: Alexander Kabui 2024-09-06 11:17:18 +0300
committer: GitHub 2024-09-06 11:17:18 +0300
commit: cfeb54b776e95194381d26cff02ea738ad4fd3e0 (patch)
tree: 1a7cf011bbeb61df90963d79237643bc9f8611f5
parent: 8e28770342b65cff78441670f1841e0130dc9c4b (diff)
parent: 8cb85c8f8c12180702cfc3a257bf9a513ac4da3d (diff)
download: genenetwork3-cfeb54b776e95194381d26cff02ea738ad4fd3e0.tar.gz
10 files changed, 474 insertions, 529 deletions
diff --git a/gn3/api/llm.py b/gn3/api/llm.py
index 7d860d8..7e60271 100644
--- a/gn3/api/llm.py
+++ b/gn3/api/llm.py
@@ -1,128 +1,150 @@
-"""API for data used to generate menus"""
-
-# pylint: skip-file
+"""Api endpoints for gnqa"""
+import json
+from datetime import datetime
 
-from flask import jsonify, request, Blueprint, current_app
+from flask import Blueprint
+from flask import current_app
+from flask import jsonify
+from flask import request
 
-from functools import wraps
 from gn3.llms.process import get_gnqa
-from gn3.llms.process import get_user_queries
-from gn3.llms.process import fetch_query_results
+from gn3.llms.errors import LLMError
 from gn3.auth.authorisation.oauth2.resource_server import require_oauth
 from gn3.auth import db
-from redis import Redis
-import json
-import sqlite3
-from datetime import timedelta
-
-GnQNA = Blueprint("GnQNA", __name__)
 
 
-def handle_errors(func):
-    @wraps(func)
-    def decorated_function(*args, **kwargs):
-        try:
-            return func(*args, **kwargs)
-        except Exception as error:
-            return jsonify({"error": str(error)}), 500
-    return decorated_function
+gnqa = Blueprint("gnqa", __name__)
 
 
-@GnQNA.route("/gnqna", methods=["POST"])
-def gnqa():
-    # todo  add auth
+@gnqa.route("/search", methods=["PUT"])
+def search():
+    """Api  endpoint for searching queries in fahamu Api"""
     query = request.json.get("querygnqa", "")
     if not query:
         return jsonify({"error": "querygnqa is missing in the request"}), 400
-
-    try:
-        fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN")
-        if fahamu_token is None:
-            return jsonify({"query": query, "error": "Use of invalid fahamu auth token"}), 500
-        task_id, answer, refs = get_gnqa(
-            query, fahamu_token, current_app.config.get("DATA_DIR"))
-        response = {
-            "task_id": task_id,
-            "query": query,
-            "answer": answer,
-            "references": refs
-        }
-        with (Redis.from_url(current_app.config["REDIS_URI"],
-                             decode_responses=True) as redis_conn):
-            # The key will be deleted after 60 seconds
-            redis_conn.setex(f"LLM:random_user-{query}", timedelta(days=10), json.dumps(response))
-        return jsonify({
-            **response,
-            "prev_queries": get_user_queries("random_user", redis_conn)
-        })
-    except Exception as error:
-        return jsonify({"query": query, "error": f"Request failed-{str(error)}"}), 500
-
-
-@GnQNA.route("/rating/<task_id>", methods=["POST"])
+    fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN")
+    if not fahamu_token:
+        raise LLMError(
+            "Request failed: an LLM authorisation token  is required ", query)
+    task_id, answer, refs = get_gnqa(
+        query, fahamu_token, current_app.config.get("DATA_DIR"))
+    response = {
+        "task_id": task_id,
+        "query": query,
+        "answer": answer,
+        "references": refs
+    }
+    with (db.connection(current_app.config["LLM_DB_PATH"]) as conn,
+          require_oauth.acquire("profile user") as token):
+        cursor = conn.cursor()
+        cursor.execute("""CREATE TABLE IF NOT EXISTS
+        history(user_id TEXT NOT NULL,
+        task_id TEXT NOT NULL,
+        query  TEXT NOT NULL,
+        results  JSONB,
+        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+        PRIMARY KEY(task_id)) WITHOUT ROWID""")
+        cursor.execute(
+            """INSERT INTO history(user_id, task_id, query, results)
+            VALUES(?, ?, ?, ?)
+            """, (str(token.user.user_id), str(task_id["task_id"]),
+                  query,
+                  json.dumps(response))
+        )
+    return response
+
+
+@gnqa.route("/rating/<task_id>", methods=["POST"])
 @require_oauth("profile")
-def rating(task_id):
-    try:
-        llm_db_path = current_app.config["LLM_DB_PATH"]
-        with (require_oauth.acquire("profile") as token,
-              db.connection(llm_db_path) as conn):
-
-            results = request.json
-            user_id, query, answer, weight = (token.user.user_id,
-                                              results.get("query"),
-                                              results.get("answer"),
-                                              results.get("weight", 0))
-            cursor = conn.cursor()
-            create_table = """CREATE TABLE IF NOT EXISTS Rating(
-                  user_id TEXT NOT NULL,
-                  query TEXT NOT NULL,
-                  answer TEXT NOT NULL,
-                  weight INTEGER NOT NULL DEFAULT 0,
-                  task_id TEXT NOT NULL UNIQUE
-                  )"""
-            cursor.execute(create_table)
-            cursor.execute("""INSERT INTO Rating(user_id,query,answer,weight,task_id)
-            VALUES(?,?,?,?,?)
-            ON CONFLICT(task_id) DO UPDATE SET
-            weight=excluded.weight
-            """, (str(user_id), query, answer, weight, task_id))
+def rate_queries(task_id):
+    """Api endpoint for rating GNQA query and answer"""
+    with (require_oauth.acquire("profile") as token,
+          db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+        results = request.json
+        user_id, query, answer, weight = (token.user.user_id,
+                                          results.get("query"),
+                                          results.get("answer"),
+                                          results.get("weight", 0))
+        cursor = conn.cursor()
+        create_table = """CREATE TABLE IF NOT EXISTS Rating(
+              user_id TEXT NOT NULL,
+              query TEXT NOT NULL,
+              answer TEXT NOT NULL,
+              weight INTEGER NOT NULL DEFAULT 0,
+              task_id TEXT NOT NULL UNIQUE,
+              created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+              PRIMARY KEY(task_id))"""
+        cursor.execute(create_table)
+        cursor.execute("""INSERT INTO Rating(user_id, query,
+        answer, weight, task_id)
+        VALUES(?, ?, ?, ?, ?)
+        ON CONFLICT(task_id) DO UPDATE SET
+        weight=excluded.weight
+        """, (str(user_id), query, answer, weight, task_id))
         return {
-                "message": "You have successfully rated this query:Thank you!!"
-            }, 200
-    except sqlite3.Error as error:
-        return jsonify({"error": str(error)}), 500
-    except Exception as error:
-        raise error
+            "message": "You have successfully rated this query. Thank you!"
+        }, 200
 
 
-@GnQNA.route("/history/<query>", methods=["GET"])
+@gnqa.route("/search/records", methods=["GET"])
 @require_oauth("profile user")
-@handle_errors
-def fetch_user_hist(query):
-
-    with (require_oauth.acquire("profile user") as the_token, Redis.from_url(current_app.config["REDIS_URI"],
-                                                                             decode_responses=True) as redis_conn):
-        return jsonify({
-            **fetch_query_results(query, the_token.user.id, redis_conn),
-            "prev_queries": get_user_queries("random_user", redis_conn)
-        })
-
-
-@GnQNA.route("/historys/<query>", methods=["GET"])
-@handle_errors
-def fetch_users_hist_records(query):
-    """method to fetch all users hist:note this is a test functionality to be replaced by fetch_user_hist"""
-
-    with Redis.from_url(current_app.config["REDIS_URI"], decode_responses=True) as redis_conn:
-        return jsonify({
-            **fetch_query_results(query, "random_user", redis_conn),
-            "prev_queries": get_user_queries("random_user", redis_conn)
-        })
-
-
-@GnQNA.route("/get_hist_names", methods=["GET"])
-@handle_errors
-def fetch_prev_hist_ids():
-
-    with (Redis.from_url(current_app.config["REDIS_URI"], decode_responses=True)) as redis_conn:
-        return jsonify({"prev_queries": get_user_queries("random_user", redis_conn)})
+def get_user_search_records():
+    """get all  history records for a given user using their
+    user id
+    """
+    with (require_oauth.acquire("profile user") as token,
+          db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+        cursor = conn.cursor()
+        cursor.execute(
+            """SELECT task_id, query, created_at from history WHERE user_id=?""",
+            (str(token.user.user_id),))
+        results = [dict(item) for item in cursor.fetchall()]
+        return jsonify(sorted(results, reverse=True,
+                       key=lambda x: datetime.strptime(x.get("created_at"),
+                                                       '%Y-%m-%d %H:%M:%S')))
+
+
+@gnqa.route("/search/record/<task_id>", methods=["GET"])
+@require_oauth("profile user")
+def get_user_record_by_task(task_id):
+    """Get user previous search record by task id """
+    with (require_oauth.acquire("profile user") as token,
+          db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+        cursor = conn.cursor()
+        cursor.execute(
+            """SELECT results from history
+            Where task_id=? and user_id=?""",
+            (task_id,
+             str(token.user.user_id),))
+        record = cursor.fetchone()
+        if record:
+            return dict(record).get("results")
+        return {}
+
+
+@gnqa.route("/search/record/<task_id>", methods=["DELETE"])
+@require_oauth("profile user")
+def delete_record(task_id):
+    """Delete user previous seach record by task-id"""
+    with (require_oauth.acquire("profile user") as token,
+          db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+        cursor = conn.cursor()
+        query = """DELETE FROM history
+        WHERE task_id=? and user_id=?"""
+        cursor.execute(query, (task_id, token.user.user_id,))
+        return {"msg": f"Successfully Deleted the task {task_id}"}
+
+
+@gnqa.route("/search/records", methods=["DELETE"])
+@require_oauth("profile user")
+def delete_records():
+    """ Delete a users records using for all given task ids"""
+    with (require_oauth.acquire("profile user") as token,
+          db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+        task_ids = list(request.json.values())
+        cursor = conn.cursor()
+        query = """DELETE FROM history
+        WHERE task_id IN ({})
+        and user_id=?""".format(",".join("?" * len(task_ids)))
+        cursor.execute(query, (*task_ids, str(token.user.user_id),))
+        return jsonify({})
diff --git a/gn3/app.py b/gn3/app.py
index 3f1e6ee..c8f0c5a 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -25,7 +25,7 @@ from gn3.api.menu import menu
 from gn3.api.search import search
 from gn3.api.metadata import metadata
 from gn3.api.sampledata import sampledata
-from gn3.api.llm import GnQNA
+from gn3.api.llm import gnqa
 from gn3.auth import oauth2
 from gn3.case_attributes import caseattr
 
@@ -78,7 +78,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
     app.register_blueprint(sampledata, url_prefix="/api/sampledata")
     app.register_blueprint(oauth2, url_prefix="/api/oauth2")
     app.register_blueprint(caseattr, url_prefix="/api/case-attribute")
-    app.register_blueprint(GnQNA, url_prefix="/api/llm")
+    app.register_blueprint(gnqa, url_prefix="/api/llm")
 
     register_error_handlers(app)
     return app
diff --git a/gn3/errors.py b/gn3/errors.py
index f618bab..ec7a554 100644
--- a/gn3/errors.py
+++ b/gn3/errors.py
@@ -17,7 +17,7 @@ from flask import Flask, jsonify, Response, current_app
 
 from gn3.oauth2 import errors as oautherrors
 from gn3.auth.authorisation.errors import AuthorisationError
-
+from  gn3.llms.errors import LLMError
 
 def add_trace(exc: Exception, jsonmsg: dict) -> dict:
     """Add the traceback to the error handling object."""
@@ -118,6 +118,21 @@ def handle_local_authorisation_errors(exc: oautherrors.AuthorisationError):
     })), 400
 
 
+def handle_llm_error(exc: Exception) -> Response:
+    """ Handle llm erros if not handled  anywhere else. """
+    current_app.logger.error(exc)
+    resp = jsonify({
+        "query": exc.args[1],
+        "error_type": type(exc).__name__,
+        "error": (
+            exc.args[0] if bool(exc.args) else "Fahamu gnqa error occurred"
+        ),
+        "trace": traceback.format_exc()
+    })
+    resp.status_code = 500
+    return resp
+
+
 def register_error_handlers(app: Flask):
     """Register application-level error handlers."""
     app.register_error_handler(NotFound, page_not_found)
@@ -127,6 +142,7 @@ def register_error_handlers(app: Flask):
     app.register_error_handler(AuthorisationError, handle_authorisation_error)
     app.register_error_handler(RemoteDisconnected, internal_server_error)
     app.register_error_handler(URLError, url_server_error)
+    app.register_error_handler(LLMError, handle_llm_error)
     for exc in (
             EndPointInternalError,
             EndPointNotFound,
diff --git a/gn3/llms/client.py b/gn3/llms/client.py
index 042becd..54a7a17 100644
--- a/gn3/llms/client.py
+++ b/gn3/llms/client.py
@@ -1,72 +1,59 @@
-# pylint: skip-file
+"""Module  Contains code for making request to fahamu Api"""
+# pylint: disable=C0301
 import json
-import string
-import os
-import datetime
 import time
-import requests
-
-from requests import Session
-from urllib.parse import urljoin
-from requests.packages.urllib3.util.retry import Retry
-from requests import HTTPError
 from requests import Session
 from requests.adapters import HTTPAdapter
-from urllib.request import urlretrieve
-from urllib.parse import quote
-from gn3.llms.errors import UnprocessableEntity
-from gn3.llms.errors import LLMError
+from requests.adapters import Retry
 
-basedir = os.path.join(os.path.dirname(__file__))
+from gn3.llms.errors import LLMError
 
 
 class TimeoutHTTPAdapter(HTTPAdapter):
+    """Set a default timeout for HTTP calls """
     def __init__(self, timeout, *args, **kwargs):
-        """TimeoutHTTPAdapter constructor.
-        Args:
-            timeout (int): How many seconds to wait for the server to send data before
-                giving up.
-        """
+        """TimeoutHTTPAdapter constructor."""
         self.timeout = timeout
         super().__init__(*args, **kwargs)
 
-    def send(self, request, **kwargs):
+    def send(self, *args, **kwargs):
         """Override :obj:`HTTPAdapter` send method to add a default timeout."""
-        timeout = kwargs.get("timeout")
-        if timeout is None:
-            kwargs["timeout"] = self.timeout
-
-        return super().send(request, **kwargs)
+        kwargs["timeout"] = (
+            kwargs["timeout"] if kwargs.get("timeout") else self.timeout
+        )
+        return super().send(*args, **kwargs)
 
 
 class GeneNetworkQAClient(Session):
     """GeneNetworkQA Client
 
     This class provides a client object interface to the GeneNetworkQA API.
-    It extends the `requests.Session` class and includes authorization, base URL,
+    It extends the `requests.Session` class and includes authorization,
+    base URL,
     request timeouts, and request retries.
 
     Args:
-        account (str): Base address subdomain.
         api_key (str): API key.
-        version (str, optional): API version, defaults to "v3".
         timeout (int, optional): Timeout value, defaults to 5.
         total_retries (int, optional): Total retries value, defaults to 5.
-        backoff_factor (int, optional): Retry backoff factor value, defaults to 30.
+        backoff_factor (int, optional): Retry backoff factor value,
+    defaults to 30.
 
     Usage:
         from genenetworkqa import GeneNetworkQAClient
-        gnqa = GeneNetworkQAClient(account="account-name", api_key="XXXXXXXXXXXXXXXXXXX...")
+        gnqa = GeneNetworkQAClient(account="account-name",
+    api_key="XXXXXXXXXXXXXXXXXXX...")
     """
 
-    BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks'
-
-    def __init__(self, account, api_key, version="v3", timeout=30, total_retries=5, backoff_factor=30):
+    def __init__(self, api_key, timeout=30,
+                 total_retries=5, backoff_factor=2):
         super().__init__()
         self.headers.update(
             {"Authorization": "Bearer " + api_key})
-        self.answer_url = f"{self.BASE_URL}/answers"
-        self.feedback_url = f"{self.BASE_URL}/feedback"
+        self.base_url = "https://genenetwork.fahamuai.com/api/tasks"
+        self.answer_url = f"{self.base_url}/answers"
+        self.feedback_url = f"{self.base_url}/feedback"
+        self.query = ""
 
         adapter = TimeoutHTTPAdapter(
             timeout=timeout,
@@ -81,140 +68,54 @@ class GeneNetworkQAClient(Session):
         self.mount("http://", adapter)
 
     @staticmethod
-    def format_bibliography_info(bib_info):
-
-        if isinstance(bib_info, str):
-            # Remove '.txt'
-            bib_info = bib_info.removesuffix('.txt')
-        elif isinstance(bib_info, dict):
-            # Format string bibliography information
-            bib_info = "{0}.{1}.{2}.{3} ".format(bib_info.get('author', ''),
-                                                 bib_info.get('title', ''),
-                                                 bib_info.get('year', ''),
-                                                 bib_info.get('doi', ''))
-        return bib_info
-
-    @staticmethod
-    def ask_the_documents(extend_url, my_auth):
-        try:
-            response = requests.post(
-                base_url + extend_url, data={}, headers=my_auth)
-            response.raise_for_status()
-        except requests.exceptions.RequestException as e:
-            # Handle the exception appropriately, e.g., log the error
-            raise RuntimeError(f"Error making the request: {e}")
-
-        if response.status_code != 200:
-            return negative_status_msg(response), 0
-
-        task_id = get_task_id_from_result(response)
-        response = get_answer_using_task_id(task_id, my_auth)
-
-        if response.status_code != 200:
-
-            return negative_status_msg(response), 0
-
-        return response, 1
-
-    @staticmethod
     def negative_status_msg(response):
+        """ handler for non 200 response from fahamu api"""
         return f"Error: Status code -{response.status_code}- Reason::{response.reason}"
-      #  return f"Problems\n\tStatus code => {response.status_code}\n\tReason => {response.reason}"
-
-    def ask(self, exUrl, *args, **kwargs):
-        askUrl = self.BASE_URL + exUrl
-        res = self.custom_request('POST', askUrl, *args, **kwargs)
-        if (res.status_code != 200):
-            return self.negative_status_msg(res), 0
-        task_id = self.getTaskIDFromResult(res)
-        return res, task_id
-
-    def get_answer(self, taskid, *args, **kwargs):
-        query = self.answer_url + self.extendTaskID(taskid)
+
+    def ask(self, ex_url, query,  *args, **kwargs):
+        """fahamu ask api interface"""
+        self.query = query
+        res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs)
+        return res, json.loads(res.text)
+
+    def get_answer(self, task_obj, *args, **kwargs):
+        """Fahamu get answer interface"""
+        query = f"{self.answer_url}?task_id={task_obj['task_id']}"
         res = self.custom_request('GET', query, *args, **kwargs)
-        if (res.status_code != 200):
-            return self.negative_status_msg(res), 0
         return res, 1
 
     def custom_request(self, method, url, *args, **kwargs):
-
+        """
+        Make a custom request to the Fahamu API to ask and get a response.
+        This is a custom method, which is the current default for fetching items,
+        as it overrides the adapter provided above.
+        This function was created to debug the slow response rate of Fahamu and
+        provide custom a response.
+        """
         max_retries = 50
         retry_delay = 3
-
-        for i in range(max_retries):
-            try:
-                response = super().request(method, url, *args, **kwargs)
-                response.raise_for_status()
-
-            except requests.exceptions.HTTPError as error:
-                if error.response.status_code ==500:
-                    raise LLMError(error.request, error.response, f"Response Error,status_code:{error.response.status_code},Reason: Use of Invalid Token")
-                elif error.response.status_code ==404:
-                    raise LLMError(error.request,error.response,f"404 Client Error: Not Found for url: {self.BASE_URL}")
-                raise error
-
-            except requests.exceptions.RequestException as error:
-                raise error 
-
-
-
-
+        response_msg = {
+            404: "Api endpoint Does not exist",
+            500: "Use of Invalid Token/or the Fahamu Api is currently  down",
+            400: "You sent a bad Fahamu request",
+            401: "You do not have authorization to perform the request",
+        }
+        for _i in range(max_retries):
+            response = super().request(method, url, *args, **kwargs)
             if response.ok:
-                if method.lower() == "get" and response.json().get("data") is None:
+                if method.lower() == "get" and not response.json().get("data"):
+                    # note this is a dirty trick to check if fahamu has returned the results
+                    # the issue is that the api only returns 500 or 200 satus code
+                    # TODO: fix this on their end
                     time.sleep(retry_delay)
                     continue
-                else:
-                    return response
+                return response
             else:
-                time.sleep(retry_delay)
-            return response
-
-    @staticmethod
-    def get_task_id_from_result(response):
-        task_id = json.loads(response.text)
-        result = f"?task_id={task_id.get('task_id', '')}"
-        return result
-
-    @staticmethod
-    def get_answer_using_task_id(extend_url, my_auth):
-        try:
-            response = requests.get(
-                answer_url + extend_url, data={}, headers=my_auth)
-            response.raise_for_status()
-            return response
-        except requests.exceptions.RequestException as error:
-            # Handle the exception appropriately, e.g., log the error
-            raise error
-
-    @staticmethod
-    def filter_response_text(val):
-        """
-        Filters out non-printable characters from the input string and parses it as JSON.
-
-        Args:
-            val (str): Input string to be filtered and parsed.
-
-        Returns:
-            dict: Parsed JSON object.
-        # remove  this
-        """
-        return json.loads(''.join([str(char) for char in val if char in string.printable]))
-
-    def getTaskIDFromResult(self, res):
-        return json.loads(res.text)
-
-    def extendTaskID(self, task_id):
-        return '?task_id=' + str(task_id['task_id'])
-
-    def get_gnqa(self, query):
-        qstr = quote(query)
-        res, task_id = api_client.ask('?ask=' + qstr)
-        res, success = api_client.get_answer(task_id)
-
-        if success == 1:
-            resp_text = filter_response_text(res.text)
-            answer = resp_text.get('data', {}).get('answer', '')
-            context = resp_text.get('data', {}).get('context', '')
-            return answer, context
-        else:
-            return res, "Unfortunately, I have nothing."
+                raise LLMError(f"Request error with code:\
+                {response.status_code} occurred with reason:\
+                {response_msg.get(response.status_code,response.reason)}",
+                               self.query)
+                #time.sleep(retry_delay)
+        raise LLMError("Timeout error: We couldn't provide a response,Please try\
+        to rephrase your question to receive feedback",
+                       self.query)
diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py
index e9f7c02..a3a47a3 100644
--- a/gn3/llms/errors.py
+++ b/gn3/llms/errors.py
@@ -1,32 +1,11 @@
-
-# pylint: skip-file
+""" Error handlers for Fahamu Api"""
 import json
-
 from requests import HTTPError
 
 
 class UnprocessableEntity(HTTPError):
-    """An HTTP 422 Unprocessable Entity error occurred.
-
+    """Error for  HTTP 422 Unprocessable Entity
     https://help.helpjuice.com/en_US/api-v3/api-v3#errors
-
-    The request could not be processed, usually due to a missing or invalid parameter.
-
-    The response will also include an error object with an explanation of fields that
-    are missing or invalid. Here is an example:
-
-    .. code-block::
-
-        HTTP/1.1 422 Unprocessable Entity
-
-
-        {
-          "errors": [
-            {
-              "email": "is not valid."
-            }
-          ]
-        }
     """
 
     def __init__(self, request, response):
@@ -56,7 +35,5 @@ class UnprocessableEntity(HTTPError):
             msg, request=request, response=response)
 
 
-class LLMError(HTTPError):
-    def __init__(self, request, response, msg):
-        super(HTTPError, self).__init__(
-            msg, request=request, response=response)
+class LLMError(Exception):
+    """custom exception for LLMErrorMIxins"""
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index e38b73e..b8e47e7 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -1,25 +1,54 @@
 """this module contains code for processing response from fahamu client.py"""
+# pylint: disable=C0301
 import os
+import re
 import string
 import json
-
-from urllib.parse import urljoin
-from urllib.parse import quote
 import logging
-import requests
+from urllib.parse import quote
 
 from gn3.llms.client import GeneNetworkQAClient
-from gn3.llms.response import DocIDs
 
 
 BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks'
-
-
-# pylint: disable=C0301
-
+BASEDIR = os.path.abspath(os.path.dirname(__file__))
+
+
+class DocIDs():
+    """ Class Method to Parse document id and names from files"""
+    def __init__(self):
+        """
+        init method for Docids
+        * doc_ids.json: open doc_ids for gn references
+        * sugar_doc_ids:  open doc_ids for diabetes references
+        """
+        self.doc_ids = load_file("doc_ids.json", BASEDIR)
+        sugar_doc_ids = load_file("all_files.json", BASEDIR)
+        self.format_doc_ids(sugar_doc_ids)
+
+    def format_doc_ids(self, docs):
+        """method to format doc_ids for list items doc_id and doc_name"""
+        for _key, val in docs.items():
+            if isinstance(val, list):
+                for doc_obj in val:
+                    doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "")
+                    self.doc_ids.update({doc_obj["id"]:  doc_name})
+
+    def get_info(self, doc_id):
+        """ interface to make read from doc_ids
+           and extract info data  else returns
+           doc_id
+        Args:
+            doc_id: str: a search key for doc_ids
+        Returns:
+              an object if doc id exists else
+              raises a KeyError
+        """
+        return self.doc_ids[doc_id]
 
 def format_bibliography_info(bib_info):
-    """Function for formatting bibliography info"""
+    """Utility function for formatting bibliography info
+    """
     if isinstance(bib_info, str):
         return bib_info.removesuffix('.txt')
     elif isinstance(bib_info, dict):
@@ -27,58 +56,62 @@ def format_bibliography_info(bib_info):
     return bib_info
 
 
-def filter_response_text(val):
-    """helper function for filtering non-printable chars"""
-    return json.loads(''.join([str(char)
-                               for char in val if char in string.printable]))
-
-
 def parse_context(context, get_info_func, format_bib_func):
-    """function to parse doc_ids content"""
+    """Function to parse doc_ids content
+     Args:
+         context: raw references from  fahamu api
+         get_info_func: function to get doc_ids info
+         format_bib_func:  function to foramt bibliography info
+    Returns:
+          an list with each item having (doc_id,bib_info,
+          combined reference text)
+    """
     results = []
     for doc_ids, summary in context.items():
         combo_txt = ""
         for entry in summary:
             combo_txt += "\t" + entry["text"]
-        doc_info = get_info_func(doc_ids)
-        bib_info = doc_ids if doc_ids == doc_info else format_bib_func(
-            doc_info)
+        try:
+            doc_info = get_info_func(doc_ids)
+            bib_info = format_bib_func(doc_info)
+        except KeyError:
+            bib_info = doc_ids
+        pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*'
+        combo_text = re.sub(pattern,
+                            lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
+                            combo_txt)
         results.append(
-            {"doc_id": doc_ids, "bibInfo": bib_info, "comboTxt": combo_txt})
+            {"doc_id": doc_ids, "bibInfo": bib_info,
+             "comboTxt": combo_text})
     return results
 
 
-def rate_document(task_id, doc_id, rating, auth_token):
-    """This method is used to provide feedback for a document by making a rating."""
-    # todo move this to clients
-    try:
-        url = urljoin(BASE_URL,
-                      f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""")
-        headers = {"Authorization": f"Bearer {auth_token}"}
-
-        resp = requests.post(url, headers=headers)
-        resp.raise_for_status()
-
-        return {"status": "success", **resp.json()}
-    except requests.exceptions.HTTPError as http_error:
-        raise RuntimeError(f"HTTP Error Occurred:\
-            {http_error.response.text} -with status code- {http_error.response.status_code}") from http_error
-    except Exception as error:
-        raise RuntimeError(f"An error occurred: {str(error)}") from error
-
-
 def load_file(filename, dir_path):
-    """function to open and load json file"""
-    file_path = os.path.join(dir_path, f"{filename}")
-    if not os.path.isfile(file_path):
-        raise FileNotFoundError(f"{filename} was not found or is a directory")
-    with open(file_path, "rb") as file_handler:
+    """Utility function to read json file
+    Args:
+        filename:  file name to read
+        dir_path:  base directory for the file
+    Returns: json data read to a dict
+    """
+    with open(os.path.join(dir_path, f"{filename}"),
+              "rb") as file_handler:
         return json.load(file_handler)
 
 
 def fetch_pubmed(references, file_name, data_dir=""):
-    """method to fetch and populate references with pubmed"""
-
+    """
+    Fetches PubMed data from a JSON file and populates the\
+    references dictionary.
+
+    Args:
+        references (dict): Dictionary with document IDs as keys\
+    and reference data as values.
+        filename (str): Name of the JSON file containing PubMed data.
+        data_dir (str): Base directory where the data files are located.
+
+    Returns:
+        dict: Updated references dictionary populated with the PubMed data.
+    """
     try:
         pubmed = load_file(file_name, os.path.join(data_dir, "gn-meta/lit"))
         for reference in references:
@@ -92,44 +125,27 @@ def fetch_pubmed(references, file_name, data_dir=""):
         return references
 
 
-def get_gnqa(query, auth_token, tmp_dir=""):
-    """entry function for the gn3 api endpoint()"""
-
-    api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token)
-    res, task_id = api_client.ask('?ask=' + quote(query), auth_token)
-    if task_id == 0:
-        raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
-    res, success = api_client.get_answer(task_id)
-    if success == 1:
-        resp_text = filter_response_text(res.text)
-        if resp_text.get("data") is None:
-            return task_id, "Please try to rephrase your question to receive feedback", []
-        answer = resp_text['data']['answer']
-        context = resp_text['data']['context']
-        references = parse_context(
-            context, DocIDs().getInfo, format_bibliography_info)
-        references = fetch_pubmed(references, "pubmed.json", tmp_dir)
-
-        return task_id, answer, references
-    else:
-        return task_id, "Please try to rephrase your question to receive feedback", []
-
-
-def fetch_query_results(query, user_id, redis_conn):
-    """this method fetches prev user query searches"""
-    result = redis_conn.get(f"LLM:{user_id}-{query}")
-    if result:
-        return json.loads(result)
-    return {
-        "query": query,
-        "answer": "Sorry No answer for you",
-        "references": [],
-        "task_id": None
-    }
-
-
-def get_user_queries(user_id, redis_conn):
-    """methos to fetch all queries for a specific user"""
-
-    results = redis_conn.keys(f"LLM:{user_id}*")
-    return [query for query in [result.partition("-")[2] for result in results] if query != ""]
+def get_gnqa(query, auth_token, data_dir=""):
+    """entry function for the gn3 api endpoint()
+    ARGS:
+         query: what is  a gene
+         auth_token: token to connect to api_client
+         data_dir:  base datirectory for gn3 data
+    Returns:
+         task_id: fahamu unique identifier for task
+         answer
+         references: contains doc_name,reference,pub_med_info
+    """
+    api_client = GeneNetworkQAClient(api_key=auth_token)
+    res, task_id = api_client.ask('?ask=' + quote(query), query=query)
+    res, _status = api_client.get_answer(task_id)
+    resp_text = json.loads(''.join([str(char)
+                           for char in res.text if char in string.printable]))
+    answer = re.sub(r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*',
+                    lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
+                    resp_text["data"]["answer"])
+    context = resp_text['data']['context']
+    return task_id, answer, fetch_pubmed(parse_context(
+                            context, DocIDs().get_info,
+                            format_bibliography_info),
+                            "pubmed.json", data_dir)
diff --git a/gn3/llms/response.py b/gn3/llms/response.py
deleted file mode 100644
index 11cbd94..0000000
--- a/gn3/llms/response.py
+++ /dev/null
@@ -1,75 +0,0 @@
-
-# pylint: skip-file
-import string
-import json
-import os
-
-
-basedir           = os.path.abspath(os.path.dirname(__file__))
-
-
-class DocIDs():
-    def __init__(self):
-        # open doc ids for GN refs
-        self.doc_ids = self.loadFile("doc_ids.json")
-        # open doc ids for Diabetes references
-        self.sugar_doc_ids = self.loadFile("all_files.json")
-        # format is not what I prefer, it needs to be rebuilt
-        self.formatDocIDs(self.sugar_doc_ids)
-
-    def loadFile(self, file_name):
-        file_path = os.path.join(basedir, file_name)
-        if os.path.isfile(file_path):
-            f = open(file_path, "rb")
-            result = json.load(f)
-            f.close()
-            return result
-        else:
-            raise Exception("\n{0} -- File does not exist\n".format(file_path))
-    
-    def formatDocIDs(self, values):
-        for _key, _val in values.items():
-            if isinstance(_val, list):
-                for theObject in _val:
-                    docName = self.formatDocumentName(theObject['filename'])
-                    docID   = theObject['id']
-                    self.doc_ids.update({docID: docName})
-                    
-    def formatDocumentName(self, val):
-       result = val.removesuffix('.pdf') 
-       result = result.removesuffix('.txt') 
-       result = result.replace('_', ' ')
-       return result
-
-
-    def getInfo(self, doc_id):
-        if doc_id in self.doc_ids.keys():
-            return self.doc_ids[doc_id]
-        else:
-            return doc_id
-
-class RespContext():
-    def __init__(self, context):
-        self.cntxt = context
-        self.theObj = {}
-
-    def parseIntoObject(self, info):
-        # check for obj, arr, or val
-        for key, val in info.items():
-            if isinstance(val, list):
-                self.parseIntoObject(val)
-            elif isinstance(val, str) or isinstance(val, int):
-                self.theObj[key] = val
-            self.theObj[key] = self.val
-
-
-def createAccordionFromJson(theContext):
-    result = ''
-    # loop thru json array
-    ndx = 0
-    for docID, summaryLst in theContext.items():
-        # item is a key with a list
-        comboTxt = ''
-        for entry in summaryLst:
-            comboTxt += '\t' + entry['text']
-    return result
-\ No newline at end of file
diff --git a/sql/update/llm_db_tables.sql b/sql/update/llm_db_tables.sql
new file mode 100644
index 0000000..b501832
--- /dev/null
+++ b/sql/update/llm_db_tables.sql
@@ -0,0 +1,47 @@
+-- llm_db_update.sql ---
+
+-- Copyright (C) 2024  Alexander kabui <alexanderkabua@gmail.com>
+
+-- Author:  Alexander Kabui <alexanderkabua@gmail.com>
+
+-- This program is free software; you can redistribute it and/or
+-- modify it under the terms of the GNU General Public License
+-- as published by the Free Software Foundation; either version 3
+-- of the License, or (at your option) any later version.
+
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-- GNU General Public License for more details.
+
+-- You should have received a copy of the GNU General Public License
+-- along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+-- Sql file to create the tables for history rating and  adding indexing for the history table
+-- this targets setting up a new db
+-- and  adding timestamp column the Rating table
+
+
+CREATE TABLE IF NOT EXISTS history (
+    user_id TEXT NOT NULL,
+    task_id TEXT NOT NULL,
+    query TEXT NOT NULL,
+    results JSONB,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    PRIMARY KEY (task_id)
+) WITHOUT ROWID;
+
+
+CREATE INDEX IF NOT EXISTS idx_tbl_history_cols_task_id_user_id
+ON history (task_id, user_id);
+
+
+
+CREATE TABLE IF NOT EXISTS Rating(
+    user_id TEXT NOT NULL,
+    query TEXT NOT NULL,
+    answer TEXT NOT NULL,
+    weight INTEGER NOT NULL DEFAULT 0,
+    task_id TEXT NOT NULL UNIQUE,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    PRIMARY KEY (task_id));
diff --git a/sql/update/llm_db_update.sql b/sql/update/llm_db_update.sql
new file mode 100644
index 0000000..7f1a9f9
--- /dev/null
+++ b/sql/update/llm_db_update.sql
@@ -0,0 +1,37 @@
+-- llm_db_update.sql ---
+
+-- Copyright (C) 2024  Alexander kabui <alexanderkabua@gmail.com>
+
+-- Author:  Alexander Kabui <alexanderkabua@gmail.com>
+
+-- This program is free software; you can redistribute it and/or
+-- modify it under the terms of the GNU General Public License
+-- as published by the Free Software Foundation; either version 3
+-- of the License, or (at your option) any later version.
+
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-- GNU General Public License for more details.
+
+-- You should have received a copy of the GNU General Public License
+-- along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+-- Sql file to create the history table, adding indexing for the history table
+-- and  adding timestamp column the Rating table
+
+
+CREATE TABLE IF NOT EXISTS history (
+    user_id TEXT NOT NULL,
+    task_id TEXT NOT NULL,
+    query TEXT NOT NULL,
+    results JSONB,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    PRIMARY KEY (task_id)
+) WITHOUT ROWID;
+
+
+CREATE INDEX IF NOT EXISTS idx_tbl_history_cols_task_id_user_id
+ON history (task_id, user_id);
+
+ALTER TABLE Rating ADD COLUMN created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
index 7b8a970..8fbaba6 100644
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -1,10 +1,9 @@
-# pylint: disable=unused-argument
-"""Test cases for procedures defined in llms module"""
-from dataclasses import dataclass
+"""Test cases for procedures defined in llms """
+# pylint: disable=C0301
 import pytest
-from gn3.llms.process import get_gnqa
+from gn3.llms.process import fetch_pubmed
 from gn3.llms.process import parse_context
-
+from gn3.llms.process import format_bibliography_info
 
 
 @pytest.mark.unit_test
@@ -36,67 +35,72 @@ def test_parse_context():
 
     assert parsed_result == expected_result
 
-
-@dataclass(frozen=True)
-class MockResponse:
-    """mock a response  object"""
-    text: str
-
-    def __getattr__(self, name: str):
-        return self.__dict__[f"_{name}"]
-
-
-class MockGeneNetworkQAClient:
-    """mock the GeneNetworkQAClient class"""
-
-    def __init__(self, session, api_key):
-        pass
-
-    def ask(self, query, auth_token):
-        """mock method for ask query"""
-        # Simulate the ask method
-        return MockResponse("Mock response"), "F400995EAFE104EA72A5927CE10C73B7"
-
-    def get_answer(self, task_id):
-        """mock  get_answer method"""
-        return MockResponse("Mock answer"), 1
-
-
-def mock_filter_response_text(text):
-    """ method to simulate the filterResponseText method"""
-    return {"data": {"answer": "Mock answer for what is a gene", "context": {}}}
-
-
-def mock_parse_context(context, get_info_func, format_bib_func):
-    """method to simulate the  parse context method"""
-    return []
-
-
 @pytest.mark.unit_test
-def test_get_gnqa(monkeypatch):
-    """test for process.get_gnqa functoin"""
-    monkeypatch.setattr(
-        "gn3.llms.process.GeneNetworkQAClient",
-        MockGeneNetworkQAClient
-    )
+def test_format_bib_info():
+    """Test for formatting bibliography info """
+    mock_fahamu_bib_info = [
+         {
+             "author": "J.m",
+             "firstName": "john",
+             "title": "Genes and aging",
+             "year": 2013,
+             "doi": "https://Articles.com/12231"
+         },
+         "2019-Roy-Evaluation of Sirtuin-3 probe quality and co-expressed genes",
+         "2015 - Differential regional and cellular distribution of TFF3 peptide in the human brain.txt"]
+    expected_result = [
+        "J.m.Genes and aging.2013.https://Articles.com/12231 ",
+        "2019-Roy-Evaluation of Sirtuin-3 probe quality and co-expressed genes",
+        "2015 - Differential regional and cellular distribution of TFF3 peptide in the human brain"
+    ]
 
-    monkeypatch.setattr(
-        'gn3.llms.process.filter_response_text',
-        mock_filter_response_text
-    )
-    monkeypatch.setattr(
-        'gn3.llms.process.parse_context',
-        mock_parse_context
-    )
+    assert all((format_bibliography_info(data) == expected
+                for data, expected
+                in zip(mock_fahamu_bib_info, expected_result)))
 
-    query = "What is a gene"
-    auth_token = "test_token"
-    result = get_gnqa(query, auth_token)
 
-    expected_result = (
-        "F400995EAFE104EA72A5927CE10C73B7",
-        'Mock answer for what is a gene',
-        []
-    )
+@pytest.mark.unit_test
+def test_fetching_pubmed_info(monkeypatch):
+    """Test for fetching and populating pubmed data with pubmed info"""
+    def mock_load_file(_filename, _dir_path):
+        return {
+            "12121": {
+                "Abstract": "items1",
+                "Author": "A1"
+            }
+        }
+    # patch the module with the mocked function
+
+    monkeypatch.setattr("gn3.llms.process.load_file", mock_load_file)
+    expected_results = [
+        {
+            "title": "Genes",
+            "year": "2014",
+            "doi": "https/article/genes/12121",
+            "doc_id": "12121",
+            "pubmed": {
+                "Abstract": "items1",
+                "Author": "A1"
+            }
+        },
+        {
+            "title": "Aging",
+            "year": "2014",
+            "doc_id": "12122"
+        }
+    ]
 
-    assert result == expected_result
+    data = [{
+            "title": "Genes",
+            "year": "2014",
+            "doi": "https/article/genes/12121",
+            "doc_id": "12121",
+            },
+            {
+            "title": "Aging",
+            "year": "2014",
+            "doc_id": "12122"
+            }]
+
+    assert (fetch_pubmed(data, "/pubmed.json",  "data/")
+            == expected_results)
author	Alexander Kabui	2024-09-06 11:17:18 +0300
committer	GitHub	2024-09-06 11:17:18 +0300
commit	cfeb54b776e95194381d26cff02ea738ad4fd3e0 (patch)
tree	1a7cf011bbeb61df90963d79237643bc9f8611f5
parent	8e28770342b65cff78441670f1841e0130dc9c4b (diff)
parent	8cb85c8f8c12180702cfc3a257bf9a513ac4da3d (diff)
download	genenetwork3-cfeb54b776e95194381d26cff02ea738ad4fd3e0.tar.gz