5 files changed, 197 insertions, 194 deletions
diff --git a/gn3/api/llm.py b/gn3/api/llm.py
index 442252f..952a5b9 100644
--- a/gn3/api/llm.py
+++ b/gn3/api/llm.py
@@ -1,125 +1,117 @@
 """Api endpoints for gnqa"""
-from datetime import timedelta
 import json
-import sqlite3
-from redis import Redis
-
+from datetime import datetime, timezone
 from flask import Blueprint
 from flask import current_app
 from flask import jsonify
 from flask import request
 
 from gn3.llms.process import get_gnqa
-from gn3.llms.process import get_user_queries
-from gn3.llms.process import fetch_query_results
 from gn3.llms.errors import LLMError
 from gn3.auth.authorisation.oauth2.resource_server import require_oauth
 from gn3.auth import db
 
+
 gnqa = Blueprint("gnqa", __name__)
 
 
-@gnqa.route("/gnqna", methods=["POST"])
-def gnqna():
-    """Main gnqa endpoint"""
+@gnqa.route("/search", methods=["PUT"])
+def search():
+    """Api  endpoint for searching queries in fahamu Api"""
     query = request.json.get("querygnqa", "")
     if not query:
         return jsonify({"error": "querygnqa is missing in the request"}), 400
-
-    try:
-        fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN")
-        if fahamu_token is None:
-            return jsonify({"query": query,
-                            "error": "Use of invalid fahamu auth token"}), 500
-        task_id, answer, refs = get_gnqa(
-            query, fahamu_token, current_app.config.get("DATA_DIR"))
-        response = {
-            "task_id": task_id,
-            "query": query,
-            "answer": answer,
-            "references": refs
-        }
-        with (Redis.from_url(current_app.config["REDIS_URI"],
-                             decode_responses=True) as redis_conn):
-            redis_conn.setex(
-                f"LLM:random_user-{query}",
-                timedelta(days=10), json.dumps(response))
-        return jsonify({
-            **response,
-            "prev_queries": get_user_queries("random_user", redis_conn)
-        })
-    except LLMError as error:
-        return jsonify({"query": query,
-                        "error": f"Request failed-{str(error)}"}), 500
+    fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN")
+    if not fahamu_token:
+        raise LLMError(
+            "Request failed:an LLM authorisation token  is required ", query)
+    task_id, answer, refs = get_gnqa(
+        query, fahamu_token, current_app.config.get("DATA_DIR"))
+    response = {
+        "task_id": task_id,
+        "query": query,
+        "answer": answer,
+        "references": refs
+    }
+    with (db.connection(current_app.config["LLM_DB_PATH"]) as conn,
+          require_oauth.acquire("profile user") as token):
+        cursor = conn.cursor()
+        cursor.execute("""CREATE TABLE IF NOT EXISTS
+        history(user_id TEXT NOT NULL,
+        task_id TEXT NOT NULL,
+        query  TEXT NOT NULL,
+        results  TEXT,
+        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+        PRIMARY KEY(task_id)) WITHOUT ROWID""")
+        cursor.execute(
+            """INSERT INTO history(user_id, task_id, query, results)
+            VALUES(?, ?, ?, ?)
+            """, (str(token.user.user_id), str(task_id["task_id"]),
+                  query,
+                  json.dumps(response))
+        )
+    return response
 
 
 @gnqa.route("/rating/<task_id>", methods=["POST"])
 @require_oauth("profile")
-def rating(task_id):
-    """Endpoint for rating qnqa query and answer"""
-    try:
-        llm_db_path = current_app.config["LLM_DB_PATH"]
-        with (require_oauth.acquire("profile") as token,
-              db.connection(llm_db_path) as conn):
-
-            results = request.json
-            user_id, query, answer, weight = (token.user.user_id,
-                                              results.get("query"),
-                                              results.get("answer"),
-                                              results.get("weight", 0))
-            cursor = conn.cursor()
-            create_table = """CREATE TABLE IF NOT EXISTS Rating(
-                  user_id TEXT NOT NULL,
-                  query TEXT NOT NULL,
-                  answer TEXT NOT NULL,
-                  weight INTEGER NOT NULL DEFAULT 0,
-                  task_id TEXT NOT NULL UNIQUE
-                  )"""
-            cursor.execute(create_table)
-            cursor.execute("""INSERT INTO Rating(user_id,query,
-            answer,weight,task_id)
-            VALUES(?,?,?,?,?)
-            ON CONFLICT(task_id) DO UPDATE SET
-            weight=excluded.weight
-            """, (str(user_id), query, answer, weight, task_id))
+def rate_queries(task_id):
+    """Api endpoint for rating GNQA query and answer"""
+    with (require_oauth.acquire("profile") as token,
+          db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+        results = request.json
+        user_id, query, answer, weight = (token.user.user_id,
+                                          results.get("query"),
+                                          results.get("answer"),
+                                          results.get("weight", 0))
+        cursor = conn.cursor()
+        create_table = """CREATE TABLE IF NOT EXISTS Rating(
+              user_id TEXT NOT NULL,
+              query TEXT NOT NULL,
+              answer TEXT NOT NULL,
+              weight INTEGER NOT NULL DEFAULT 0,
+              task_id TEXT NOT NULL UNIQUE,
+              created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+              PRIMARY KEY(task_id))"""
+        cursor.execute(create_table)
+        cursor.execute("""INSERT INTO Rating(user_id, query,
+        answer, weight, task_id, created_at)
+        VALUES(?, ?, ?, ?, ?, ?)
+        ON CONFLICT(task_id) DO UPDATE SET
+        weight=excluded.weight
+        """, (str(user_id), query, answer, weight, task_id,
+              datetime.now(timezone.utc)))
         return {
-            "message":
-            "You have successfully rated this query:Thank you!!"
+           "message": "You have successfully rated this query.Thank you!"
         }, 200
-    except sqlite3.Error as error:
-        return jsonify({"error": str(error)}), 500
 
 
-@gnqa.route("/history/<query>", methods=["GET"])
+@gnqa.route("/history", methods=["GET", "DELETE"])
 @require_oauth("profile user")
-def fetch_user_hist(query):
-    """"Endpoint to fetch previos searches for User"""
-    with (require_oauth.acquire("profile user") as the_token,
-          Redis.from_url(current_app.config["REDIS_URI"],
-          decode_responses=True) as redis_conn):
-        return jsonify({
-            **fetch_query_results(query, the_token.user.id, redis_conn),
-            "prev_queries": get_user_queries("random_user", redis_conn)
-        })
-
-
-@gnqa.route("/historys/<query>", methods=["GET"])
-def fetch_users_hist_records(query):
-    """method to fetch all users hist:note this is a test functionality
-    to be replaced by fetch_user_hist
-    """
-    with Redis.from_url(current_app.config["REDIS_URI"],
-                        decode_responses=True) as redis_conn:
-        return jsonify({
-            **fetch_query_results(query, "random_user", redis_conn),
-            "prev_queries": get_user_queries("random_user", redis_conn)
-        })
-
-
-@gnqa.route("/get_hist_names", methods=["GET"])
-def fetch_prev_hist_ids():
-    """Test method for fetching history for Anony Users"""
-    with (Redis.from_url(current_app.config["REDIS_URI"],
-                         decode_responses=True)) as redis_conn:
-        return jsonify({"prev_queries": get_user_queries("random_user",
-                                                         redis_conn)})
+def fetch_prev_history():
+    """Api endpoint to fetch GNQA previous search."""
+    with (require_oauth.acquire("profile user") as token,
+          db.connection(current_app.config["LLM_DB_PATH"]) as conn):
+        cursor = conn.cursor()
+        if request.method == "DELETE":
+            task_ids = list(request.json.values())
+            query = """DELETE FROM history
+            WHERE task_id IN ({})
+            and user_id=?""".format(",".join("?" * len(task_ids)))
+            cursor.execute(query, (*task_ids, str(token.user.user_id),))
+            return jsonify({})
+        elif (request.method == "GET" and
+              request.args.get("search_term")):
+            cursor.execute(
+                """SELECT results from history
+                Where task_id=? and user_id=?""",
+                (request.args.get("search_term"),
+                 str(token.user.user_id),))
+            record = cursor.fetchone()
+            if record:
+                return dict(record).get("results")
+            return {}
+        cursor.execute(
+            """SELECT task_id,query from history WHERE user_id=?""",
+            (str(token.user.user_id),))
+        return jsonify([dict(item) for item in cursor.fetchall()])
diff --git a/gn3/errors.py b/gn3/errors.py
index 1833bf6..c53604f 100644
--- a/gn3/errors.py
+++ b/gn3/errors.py
@@ -16,7 +16,7 @@ from authlib.oauth2.rfc6749.errors import OAuth2Error
 from flask import Flask, jsonify, Response, current_app
 
 from gn3.auth.authorisation.errors import AuthorisationError
-
+from  gn3.llms.errors import LLMError
 
 def add_trace(exc: Exception, jsonmsg: dict) -> dict:
     """Add the traceback to the error handling object."""
@@ -106,6 +106,21 @@ def handle_generic(exc: Exception) -> Response:
     return resp
 
 
+def handle_llm_error(exc: Exception) -> Response:
+    """ Handle llm erros if not handled  anywhere else. """
+    current_app.logger.error(exc)
+    resp = jsonify({
+        "query": exc.args[1],
+        "error_type": type(exc).__name__,
+        "error": (
+            exc.args[0] if bool(exc.args) else "Fahamu gnqa error occurred"
+        ),
+        "trace": traceback.format_exc()
+    })
+    resp.status_code = 500
+    return resp
+
+
 def register_error_handlers(app: Flask):
     """Register application-level error handlers."""
     app.register_error_handler(NotFound, page_not_found)
@@ -115,6 +130,7 @@ def register_error_handlers(app: Flask):
     app.register_error_handler(AuthorisationError, handle_authorisation_error)
     app.register_error_handler(RemoteDisconnected, internal_server_error)
     app.register_error_handler(URLError, url_server_error)
+    app.register_error_handler(LLMError, handle_llm_error)
     for exc in (
             EndPointInternalError,
             EndPointNotFound,
diff --git a/gn3/llms/client.py b/gn3/llms/client.py
index d57bca2..ad6c400 100644
--- a/gn3/llms/client.py
+++ b/gn3/llms/client.py
@@ -55,6 +55,7 @@ class GeneNetworkQAClient(Session):
         self.base_url = "https://genenetwork.fahamuai.com/api/tasks"
         self.answer_url = f"{self.base_url}/answers"
         self.feedback_url = f"{self.base_url}/feedback"
+        self.query = ""
 
         adapter = TimeoutHTTPAdapter(
             timeout=timeout,
@@ -83,44 +84,44 @@ class GeneNetworkQAClient(Session):
         """ handler for non 200 response from fahamu api"""
         return f"Error: Status code -{response.status_code}- Reason::{response.reason}"
 
-    def ask(self, ex_url, *args, **kwargs):
+    def ask(self, ex_url, query,  *args, **kwargs):
         """fahamu ask api interface"""
+        self.query = query
         res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs)
-        if res.status_code != 200:
-            return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
         return res, json.loads(res.text)
 
     def get_answer(self, taskid, *args, **kwargs):
         """Fahamu get answer interface"""
-        try:
-            query = f"{self.answer_url}?task_id={taskid['task_id']}"
-            res = self.custom_request('GET', query, *args, **kwargs)
-            if res.status_code != 200:
-                return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
-            return res, 1
-        except TimeoutError:
-            return "Timeout error occured:try to rephrase your query", 0
+        query = f"{self.answer_url}?task_id={taskid['task_id']}"
+        res = self.custom_request('GET', query, *args, **kwargs)
+        return res, 1
 
     def custom_request(self, method, url, *args, **kwargs):
         """ make custom request to fahamu api ask and get response"""
         max_retries = 50
         retry_delay = 3
+        response_msg = {
+            404: "Api endpoint Does not exist",
+            500: "Use of Invalid Token/or the Fahamu Api is currently  down",
+            400: "You sent a bad Fahamu request",
+            401: "You do not have authorization to perform the request",
+        }
         for _i in range(max_retries):
-            try:
-                response = super().request(method, url, *args, **kwargs)
-                response.raise_for_status()
-                if response.ok:
-                    if method.lower() == "get" and response.json().get("data") is None:
-                        time.sleep(retry_delay)
-                        continue
-                    return response
-                else:
+            response = super().request(method, url, *args, **kwargs)
+            if response.ok:
+                if method.lower() == "get" and not response.json().get("data"):
+                    # note this is a dirty trick to check if fahamu has returned the results
+                    # the issue is that the api only returns 500 or 200 satus code
+                    # TODO: fix this on their end
                     time.sleep(retry_delay)
-            except requests.exceptions.HTTPError as error:
-                if error.response.status_code == 500:
-                    raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error
-                raise LLMError(error.request, error.response,
-            f"HTTP error occurred  with error status:{error.response.status_code}") from error
-            except requests.exceptions.RequestException as error:
-                raise error
-        raise TimeoutError
+                    continue
+                return response
+            else:
+                raise LLMError(f"Request error with code:\
+                {response.status_code} occurred with reason:\
+                {response_msg.get(response.status_code,response.reason)}",
+                               self.query)
+                #time.sleep(retry_delay)
+        raise LLMError("Timeout error: We couldn't provide a response,Please try\
+        to rephrase your question to receive feedback",
+                       self.query)
diff --git a/gn3/llms/errors.py b/gn3/llms/errors.py
index af3d7b0..a3a47a3 100644
--- a/gn3/llms/errors.py
+++ b/gn3/llms/errors.py
@@ -35,8 +35,5 @@ class UnprocessableEntity(HTTPError):
             msg, request=request, response=response)
 
 
-class LLMError(HTTPError):
-    """Custom error from making Fahamu APi request """
-    def __init__(self, request, response, msg):
-        super(HTTPError, self).__init__(
-            msg, request=request, response=response)
+class LLMError(Exception):
+    """custom exception for LLMErrorMIxins"""
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 4725bcb..55c27a0 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -1,6 +1,7 @@
 """this module contains code for processing response from fahamu client.py"""
 # pylint: disable=C0301
 import os
+import re
 import string
 import json
 import logging
@@ -21,21 +22,12 @@ class DocIDs():
         * doc_ids.json: opens doc)ids for gn references
         * sugar_doc_ids:  open doci_ids for diabetes references
         """
-        self.doc_ids = self.load_file("doc_ids.json")
-        self.sugar_doc_ids = self.load_file("all_files.json")
+        self.doc_ids = load_file("doc_ids.json", BASEDIR)
+        self.sugar_doc_ids = load_file("all_files.json", BASEDIR)
         self.format_doc_ids(self.sugar_doc_ids)
 
-    def load_file(self, file_name):
-        """Method to load and read doc_id files"""
-        file_path = os.path.join(BASEDIR, file_name)
-        if os.path.isfile(file_path):
-            with open(file_path, "rb") as file_handler:
-                return json.load(file_handler)
-        else:
-            raise FileNotFoundError(f"{file_path}-- FIle does not exist\n")
-
     def format_doc_ids(self, docs):
-        """method to format doc_ids for list items"""
+        """method to format doc_ids for list items doc_id and doc_name"""
         for _key, val in docs.items():
             if isinstance(val, list):
                 for doc_obj in val:
@@ -43,7 +35,14 @@ class DocIDs():
                     self.doc_ids.update({doc_obj["id"]:  doc_name})
 
     def get_info(self, doc_id):
-        """ interface to make read from doc_ids"""
+        """ interface to make read from doc_ids
+           and extract info data  else returns
+           doc_id
+        Args:
+            doc_id: str: a search key for doc_ids
+        Returns:
+              an object with doc_info if doc_id in doc_ids
+        """
         if doc_id in self.doc_ids.keys():
             return self.doc_ids[doc_id]
         else:
@@ -51,7 +50,8 @@ class DocIDs():
 
 
 def format_bibliography_info(bib_info):
-    """Function for formatting bibliography info"""
+    """Utility function for formatting bibliography info
+    """
     if isinstance(bib_info, str):
         return bib_info.removesuffix('.txt')
     elif isinstance(bib_info, dict):
@@ -59,14 +59,16 @@ def format_bibliography_info(bib_info):
     return bib_info
 
 
-def filter_response_text(val):
-    """helper function for filtering non-printable chars"""
-    return json.loads(''.join([str(char)
-                               for char in val if char in string.printable]))
-
-
 def parse_context(context, get_info_func, format_bib_func):
-    """function to parse doc_ids content"""
+    """Function to parse doc_ids content
+     Args:
+         context: raw references from  fahamu api
+         get_info_func: function to get doc_ids info
+         format_bib_func:  function to foramt bibliography info
+    Returns:
+          an list with each item having (doc_id,bib_info,
+          combined reference text)
+    """
     results = []
     for doc_ids, summary in context.items():
         combo_txt = ""
@@ -75,13 +77,23 @@ def parse_context(context, get_info_func, format_bib_func):
         doc_info = get_info_func(doc_ids)
         bib_info = doc_ids if doc_ids == doc_info else format_bib_func(
             doc_info)
+        pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*'
+        combo_text = re.sub(pattern,
+                            lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
+                            combo_txt)
         results.append(
-            {"doc_id": doc_ids, "bibInfo": bib_info, "comboTxt": combo_txt})
+            {"doc_id": doc_ids, "bibInfo": bib_info,
+             "comboTxt": combo_text})
     return results
 
 
 def load_file(filename, dir_path):
-    """function to open and load json file"""
+    """Utility function to read json file
+    Args:
+        filename:  file name to read
+        dir_path:  base directory for the file
+    Returns: json data read to a dict
+    """
     file_path = os.path.join(dir_path, f"{filename}")
     if not os.path.isfile(file_path):
         raise FileNotFoundError(f"{filename} was not found or is a directory")
@@ -90,8 +102,19 @@ def load_file(filename, dir_path):
 
 
 def fetch_pubmed(references, file_name, data_dir=""):
-    """method to fetch and populate references with pubmed"""
+    """
+    Fetches PubMed data from a JSON file and populates the\
+    references dictionary.
+
+    Args:
+        references (dict): Dictionary with document IDs as keys\
+    and reference data as values.
+        filename (str): Name of the JSON file containing PubMed data.
+        data_dir (str): Base directory where the data files are located.
 
+    Returns:
+        dict: Updated references dictionary populated with the PubMed data.
+    """
     try:
         pubmed = load_file(file_name, os.path.join(data_dir, "gn-meta/lit"))
         for reference in references:
@@ -116,42 +139,16 @@ def get_gnqa(query, auth_token, data_dir=""):
          answer
          references: contains doc_name,reference,pub_med_info
     """
-
     api_client = GeneNetworkQAClient(api_key=auth_token)
-    res, task_id = api_client.ask('?ask=' + quote(query), auth_token)
-    if task_id == 0:
-        raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
-    res, status = api_client.get_answer(task_id)
-    if status == 1:
-        resp_text = filter_response_text(res.text)
-        if resp_text.get("data") is None:
-            return task_id, "Please try to rephrase your question to receive feedback", []
-        answer = resp_text['data']['answer']
-        context = resp_text['data']['context']
-        references = parse_context(
-            context, DocIDs().get_info, format_bibliography_info)
-        references = fetch_pubmed(references, "pubmed.json", data_dir)
-
-        return task_id, answer, references
-    else:
-        return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", []
-
-
-def fetch_query_results(query, user_id, redis_conn):
-    """this method fetches prev user query searches"""
-    result = redis_conn.get(f"LLM:{user_id}-{query}")
-    if result:
-        return json.loads(result)
-    return {
-        "query": query,
-        "answer": "Sorry No answer for you",
-        "references": [],
-        "task_id": None
-    }
-
-
-def get_user_queries(user_id, redis_conn):
-    """methos to fetch all queries for a specific user"""
-    results = redis_conn.keys(f"LLM:{user_id}*")
-    return [query for query in
-            [result.partition("-")[2] for result in results] if query != ""]
+    res, task_id = api_client.ask('?ask=' + quote(query), query=query)
+    res, _status = api_client.get_answer(task_id)
+    resp_text = json.loads(''.join([str(char)
+                           for char in res.text if char in string.printable]))
+    answer = re.sub(r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*',
+                    lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
+                    resp_text["data"]["answer"])
+    context = resp_text['data']['context']
+    return task_id, answer, fetch_pubmed(parse_context(
+                            context, DocIDs().get_info,
+                            format_bibliography_info),
+                            "pubmed.json", data_dir)