From 69013d298c869a42059af13bc63bef1bbdc7393d Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Thu, 16 May 2024 14:05:30 +0300
Subject: Update file to use correct import from response file

---
 gn3/llms/process.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index e38b73e..4edc238 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -49,8 +49,9 @@ def parse_context(context, get_info_func, format_bib_func):
 
 
 def rate_document(task_id, doc_id, rating, auth_token):
-    """This method is used to provide feedback for a document by making a rating."""
-    # todo move this to clients
+    """This method is used to provide
+    feedback for a document by making a rating
+    """
     try:
         url = urljoin(BASE_URL,
                       f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""")
@@ -107,7 +108,7 @@ def get_gnqa(query, auth_token, tmp_dir=""):
         answer = resp_text['data']['answer']
         context = resp_text['data']['context']
         references = parse_context(
-            context, DocIDs().getInfo, format_bibliography_info)
+            context, DocIDs().get_info, format_bibliography_info)
         references = fetch_pubmed(references, "pubmed.json", tmp_dir)
 
         return task_id, answer, references
-- 
cgit v1.2.3


From 75365bd88a720261a1b454f0ea11a840fb3be83e Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Thu, 16 May 2024 14:20:00 +0300
Subject: Move Parsing Doc_Ids to process file     * Context: groups related
 items

---
 gn3/llms/process.py | 51 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 8 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 4edc238..1881e92 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -1,21 +1,56 @@
 """this module contains code for processing response from fahamu client.py"""
+# pylint: disable=C0301
 import os
 import string
 import json
+import logging
+import requests
 
 from urllib.parse import urljoin
 from urllib.parse import quote
-import logging
-import requests
 
 from gn3.llms.client import GeneNetworkQAClient
-from gn3.llms.response import DocIDs
 
 
 BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks'
-
-
-# pylint: disable=C0301
+BASEDIR = os.path.abspath(os.path.dirname(__file__))
+
+
+class DocIDs():
+    """ Class Method to Parse document id and names from files"""
+    def __init__(self):
+        """
+        init method for Docids
+        * doc_ids.json: opens doc)ids for gn references
+        * sugar_doc_ids:  open doci_ids for diabetes references
+        """
+        self.doc_ids = self.load_file("doc_ids.json")
+        self.sugar_doc_ids = self.load_file("all_files.json")
+        self.format_doc_ids(self.sugar_doc_ids)
+
+    def load_file(self, file_name):
+        """Method to load and read doc_id files"""
+        file_path = os.path.join(BASEDIR, file_name)
+        if os.path.isfile(file_path):
+            with open(file_path, "rb") as file_handler:
+                return json.load(file_handler)
+        else:
+            raise FileNotFoundError(f"{file_path}-- FIle does not exist\n")
+
+    def format_doc_ids(self, docs):
+        """method to format doc_ids for list items"""
+        for _key, val in docs.items():
+            if isinstance(val, list):
+                for doc_obj in val:
+                    doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "")
+                    self.doc_ids.update({doc_obj["id"]:  doc_name})
+
+    def get_info(self, doc_id):
+        """ interface to make read from doc_ids"""
+        if doc_id in self.doc_ids.keys():
+            return self.doc_ids[doc_id]
+        else:
+            return doc_id
 
 
 def format_bibliography_info(bib_info):
@@ -131,6 +166,6 @@ def fetch_query_results(query, user_id, redis_conn):
 
 def get_user_queries(user_id, redis_conn):
     """methos to fetch all queries for a specific user"""
-
     results = redis_conn.keys(f"LLM:{user_id}*")
-    return [query for query in [result.partition("-")[2] for result in results] if query != ""]
+    return [query for query in
+            [result.partition("-")[2] for result in results] if query != ""]
-- 
cgit v1.2.3


From a5a6e319e85c28ff3ab9d6f2d8a869bc2ac77ac8 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Thu, 16 May 2024 14:25:44 +0300
Subject: Delete function: only useful when training own llm model.

---
 gn3/llms/process.py | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 1881e92..e47a997 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -83,26 +83,6 @@ def parse_context(context, get_info_func, format_bib_func):
     return results
 
 
-def rate_document(task_id, doc_id, rating, auth_token):
-    """This method is used to provide
-    feedback for a document by making a rating
-    """
-    try:
-        url = urljoin(BASE_URL,
-                      f"""/feedback?task_id={task_id}&document_id={doc_id}&feedback={rating}""")
-        headers = {"Authorization": f"Bearer {auth_token}"}
-
-        resp = requests.post(url, headers=headers)
-        resp.raise_for_status()
-
-        return {"status": "success", **resp.json()}
-    except requests.exceptions.HTTPError as http_error:
-        raise RuntimeError(f"HTTP Error Occurred:\
-            {http_error.response.text} -with status code- {http_error.response.status_code}") from http_error
-    except Exception as error:
-        raise RuntimeError(f"An error occurred: {str(error)}") from error
-
-
 def load_file(filename, dir_path):
     """function to open and load json file"""
     file_path = os.path.join(dir_path, f"{filename}")
-- 
cgit v1.2.3


From 3913374700521647e93bf9afabb9943746ac5d5b Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Thu, 16 May 2024 14:27:20 +0300
Subject: Pep8 formatting gn3:llm:process.

---
 gn3/llms/process.py | 2 --
 1 file changed, 2 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index e47a997..d080acb 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -6,9 +6,7 @@ import json
 import logging
 import requests
 
-from urllib.parse import urljoin
 from urllib.parse import quote
-
 from gn3.llms.client import GeneNetworkQAClient
 
 
-- 
cgit v1.2.3


From f30300a82f605fa96130fbcbdcd17c53296d2372 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Thu, 16 May 2024 16:34:34 +0300
Subject: Minor code refactoring related

---
 gn3/llms/process.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index d080acb..11961eb 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -4,9 +4,9 @@ import os
 import string
 import json
 import logging
+from urllib.parse import quote
 import requests
 
-from urllib.parse import quote
 from gn3.llms.client import GeneNetworkQAClient
 
 
@@ -106,15 +106,24 @@ def fetch_pubmed(references, file_name, data_dir=""):
         return references
 
 
-def get_gnqa(query, auth_token, tmp_dir=""):
-    """entry function for the gn3 api endpoint()"""
+def get_gnqa(query, auth_token, data_dir=""):
+    """entry function for the gn3 api endpoint()
+    ARGS:
+         query: what is  a gene
+         auth_token: token to connect to api_client
+         data_dir:  base datirectory for gn3 data
+    Returns:
+         task_id: fahamu unique identifier for task
+         answer
+         references: contains doc_name,reference,pub_med_info
+    """
 
-    api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token)
+    api_client = GeneNetworkQAClient(requests.Session(), auth_token)
     res, task_id = api_client.ask('?ask=' + quote(query), auth_token)
     if task_id == 0:
         raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
-    res, success = api_client.get_answer(task_id)
-    if success == 1:
+    res, status = api_client.get_answer(task_id)
+    if status == 1:
         resp_text = filter_response_text(res.text)
         if resp_text.get("data") is None:
             return task_id, "Please try to rephrase your question to receive feedback", []
@@ -122,7 +131,7 @@ def get_gnqa(query, auth_token, tmp_dir=""):
         context = resp_text['data']['context']
         references = parse_context(
             context, DocIDs().get_info, format_bibliography_info)
-        references = fetch_pubmed(references, "pubmed.json", tmp_dir)
+        references = fetch_pubmed(references, "pubmed.json", data_dir)
 
         return task_id, answer, references
     else:
-- 
cgit v1.2.3


From 50c8500105912a6380ea8f971ccfb17ef0994279 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Thu, 16 May 2024 18:56:41 +0300
Subject: Refactor code for http request adapters.

---
 gn3/llms/client.py  | 43 +++++++++++--------------------------------
 gn3/llms/process.py |  2 +-
 2 files changed, 12 insertions(+), 33 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/client.py b/gn3/llms/client.py
index 2e9898f..810227f 100644
--- a/gn3/llms/client.py
+++ b/gn3/llms/client.py
@@ -1,5 +1,6 @@
 """Module  Contains code for making request to fahamu Api"""
 # pylint: disable=C0301
+# pylint: disable=R0913
 import json
 import time
 
@@ -24,13 +25,13 @@ class TimeoutHTTPAdapter(HTTPAdapter):
         self.timeout = timeout
         super().__init__(*args, **kwargs)
 
-    def send(self, request, **kwargs):
+    def send(self, *args, **kwargs):
         """Override :obj:`HTTPAdapter` send method to add a default timeout."""
         timeout = kwargs.get("timeout")
         if timeout is None:
             kwargs["timeout"] = self.timeout
 
-        return super().send(request, **kwargs)
+        return super().send(*args, **kwargs)
 
 
 class GeneNetworkQAClient(Session):
@@ -77,21 +78,15 @@ class GeneNetworkQAClient(Session):
         self.mount("https://", adapter)
         self.mount("http://", adapter)
 
-    def ask_the_documents(self, extend_url, my_auth):
+    def get_answer_using_task_id(self, extend_url, my_auth):
+        """call this method with task id to fetch response"""
         try:
-            response = requests.post(
-                self.base_url + extend_url, data={}, headers=my_auth)
+            response = requests.get(
+               self.answer_url + extend_url, data={}, headers=my_auth)
             response.raise_for_status()
+            return response
         except requests.exceptions.RequestException as error:
-            raise RuntimeError(f"Error making the request: {error}") from error
-        if response.status_code != 200:
-            return GeneNetworkQAClient.negative_status_msg(response), 0
-        task_id = GeneNetworkQAClient.get_task_id_from_result(response)
-        response = GeneNetworkQAClient.get_answer_using_task_id(task_id,
-                                                                my_auth)
-        if response.status_code != 200:
-            return GeneNetworkQAClient.negative_status_msg(response), 0
-        return response, 1
+            raise error
 
     @staticmethod
     def negative_status_msg(response):
@@ -102,7 +97,7 @@ class GeneNetworkQAClient(Session):
         """fahamu ask api interface"""
         res = self.custom_request('POST', f"{self.base_url}{ex_url}", *args, **kwargs)
         if res.status_code != 200:
-            return self.negative_status_msg(res), 0
+            return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
         return res, json.loads(res.text)
 
     def get_answer(self, taskid, *args, **kwargs):
@@ -110,25 +105,9 @@ class GeneNetworkQAClient(Session):
         query = f"{self.answer_url}?task_id={taskid['task_id']}"
         res = self.custom_request('GET', query, *args, **kwargs)
         if res.status_code != 200:
-            return self.negative_status_msg(res), 0
+            return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
         return res, 1
 
-    @staticmethod
-    def get_task_id_from_result(response):
-        """method to get task_id from response"""
-        task_id = json.loads(response.text)
-        return f"?task_id={task_id.get('task_id', '')}"
-
-    def get_answer_using_task_id(self, extend_url, my_auth):
-        """call this method with task id to fetch response"""
-        try:
-            response = requests.get(
-               self.answer_url + extend_url, data={}, headers=my_auth)
-            response.raise_for_status()
-            return response
-        except requests.exceptions.RequestException as error:
-            raise error
-
     def custom_request(self, method, url, *args, **kwargs):
         """ make custom request to fahamu api ask and get response"""
         max_retries = 50
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 11961eb..9cb09a1 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -118,7 +118,7 @@ def get_gnqa(query, auth_token, data_dir=""):
          references: contains doc_name,reference,pub_med_info
     """
 
-    api_client = GeneNetworkQAClient(requests.Session(), auth_token)
+    api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token)
     res, task_id = api_client.ask('?ask=' + quote(query), auth_token)
     if task_id == 0:
         raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
-- 
cgit v1.2.3


From d3bc323fe3a965ee5b6917987c4fe7662056e560 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Fri, 17 May 2024 13:34:37 +0300
Subject: Refactor custom  request codebase.

---
 gn3/llms/client.py  | 40 +++++++++++++++++++---------------------
 gn3/llms/process.py |  5 ++---
 2 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/client.py b/gn3/llms/client.py
index 05e3500..d57bca2 100644
--- a/gn3/llms/client.py
+++ b/gn3/llms/client.py
@@ -1,6 +1,5 @@
 """Module  Contains code for making request to fahamu Api"""
 # pylint: disable=C0301
-# pylint: disable=R0913
 import json
 import time
 
@@ -36,9 +35,7 @@ class GeneNetworkQAClient(Session):
     request timeouts, and request retries.
 
     Args:
-        account (str): Base address subdomain.
         api_key (str): API key.
-        version (str, optional): API version, defaults to "v3".
         timeout (int, optional): Timeout value, defaults to 5.
         total_retries (int, optional): Total retries value, defaults to 5.
         backoff_factor (int, optional): Retry backoff factor value,
@@ -50,7 +47,7 @@ class GeneNetworkQAClient(Session):
     api_key="XXXXXXXXXXXXXXXXXXX...")
     """
 
-    def __init__(self, account, api_key, version="v3", timeout=30,
+    def __init__(self, api_key, timeout=30,
                  total_retries=5, backoff_factor=30):
         super().__init__()
         self.headers.update(
@@ -95,11 +92,14 @@ class GeneNetworkQAClient(Session):
 
     def get_answer(self, taskid, *args, **kwargs):
         """Fahamu get answer interface"""
-        query = f"{self.answer_url}?task_id={taskid['task_id']}"
-        res = self.custom_request('GET', query, *args, **kwargs)
-        if res.status_code != 200:
-            return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
-        return res, 1
+        try:
+            query = f"{self.answer_url}?task_id={taskid['task_id']}"
+            res = self.custom_request('GET', query, *args, **kwargs)
+            if res.status_code != 200:
+                return f"Error: Status code -{res.status_code}- Reason::{res.reason}", 0
+            return res, 1
+        except TimeoutError:
+            return "Timeout error occured:try to rephrase your query", 0
 
     def custom_request(self, method, url, *args, **kwargs):
         """ make custom request to fahamu api ask and get response"""
@@ -109,20 +109,18 @@ class GeneNetworkQAClient(Session):
             try:
                 response = super().request(method, url, *args, **kwargs)
                 response.raise_for_status()
+                if response.ok:
+                    if method.lower() == "get" and response.json().get("data") is None:
+                        time.sleep(retry_delay)
+                        continue
+                    return response
+                else:
+                    time.sleep(retry_delay)
             except requests.exceptions.HTTPError as error:
                 if error.response.status_code == 500:
                     raise LLMError(error.request, error.response, f"Response Error with:status_code:{error.response.status_code},Reason for error: Use of Invalid Fahamu Token") from error
-                elif error.response.status_code == 404:
-                    raise LLMError(error.request, error.response, f"404 Client Error: Not Found for url: {self.base_url}") from error
-                raise error
+                raise LLMError(error.request, error.response,
+            f"HTTP error occurred  with error status:{error.response.status_code}") from error
             except requests.exceptions.RequestException as error:
                 raise error
-            if response.ok:
-                if method.lower() == "get" and response.json().get("data") is None:
-                    time.sleep(retry_delay)
-                    continue
-                else:
-                    return response
-            else:
-                time.sleep(retry_delay)
-            return response
+        raise TimeoutError
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 9cb09a1..4725bcb 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -5,7 +5,6 @@ import string
 import json
 import logging
 from urllib.parse import quote
-import requests
 
 from gn3.llms.client import GeneNetworkQAClient
 
@@ -118,7 +117,7 @@ def get_gnqa(query, auth_token, data_dir=""):
          references: contains doc_name,reference,pub_med_info
     """
 
-    api_client = GeneNetworkQAClient(requests.Session(), api_key=auth_token)
+    api_client = GeneNetworkQAClient(api_key=auth_token)
     res, task_id = api_client.ask('?ask=' + quote(query), auth_token)
     if task_id == 0:
         raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
@@ -135,7 +134,7 @@ def get_gnqa(query, auth_token, data_dir=""):
 
         return task_id, answer, references
     else:
-        return task_id, "Please try to rephrase your question to receive feedback", []
+        return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", []
 
 
 def fetch_query_results(query, user_id, redis_conn):
-- 
cgit v1.2.3


From 2a99da9f46233a28e9ea0b6a297d8a6b93f61923 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Tue, 21 May 2024 16:38:53 +0300
Subject: Remove obsolete functions.

---
 gn3/api/llm.py      | 39 +--------------------------------------
 gn3/llms/process.py | 20 --------------------
 2 files changed, 1 insertion(+), 58 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/api/llm.py b/gn3/api/llm.py
index b2c9c3e..02b37f9 100644
--- a/gn3/api/llm.py
+++ b/gn3/api/llm.py
@@ -9,8 +9,6 @@ from flask import jsonify
 from flask import request
 
 from gn3.llms.process import get_gnqa
-from gn3.llms.process import get_user_queries
-from gn3.llms.process import fetch_query_results
 from gn3.llms.errors import LLMError
 from gn3.auth.authorisation.oauth2.resource_server import require_oauth
 
@@ -46,7 +44,7 @@ def gnqna():
                 redis_conn.set(
                     f"LLM:{str(token.user.user_id)}-{str(task_id['task_id'])}",
                     json.dumps(response)
-                      )
+                )
                 return response
         except Exception:    # handle specific error
             return response
@@ -105,38 +103,3 @@ def fetch_prev_searches():
         for key in redis_conn.scan_iter(f"LLM:{str(the_token.user.user_id)}*"):
             query_result[key] = json.loads(redis_conn.get(key))
         return jsonify(query_result)
-
-
-@gnqa.route("/history/<query>", methods=["GET"])
-@require_oauth("profile user")
-def fetch_user_hist(query):
-    """"Endpoint to fetch previos searches for User"""
-    with (require_oauth.acquire("profile user") as the_token,
-          Redis.from_url(current_app.config["REDIS_URI"],
-          decode_responses=True) as redis_conn):
-        return jsonify({
-            **fetch_query_results(query, the_token.user.user_id, redis_conn),
-            "prev_queries": get_user_queries("random_user", redis_conn)
-        })
-
-
-@gnqa.route("/historys/<query>", methods=["GET"])
-def fetch_users_hist_records(query):
-    """method to fetch all users hist:note this is a test functionality
-    to be replaced by fetch_user_hist
-    """
-    with Redis.from_url(current_app.config["REDIS_URI"],
-                        decode_responses=True) as redis_conn:
-        return jsonify({
-            **fetch_query_results(query, "random_user", redis_conn),
-            "prev_queries": get_user_queries("random_user", redis_conn)
-        })
-
-
-@gnqa.route("/get_hist_names", methods=["GET"])
-def fetch_prev_hist_ids():
-    """Test method for fetching history for Anony Users"""
-    with (Redis.from_url(current_app.config["REDIS_URI"],
-                         decode_responses=True)) as redis_conn:
-        return jsonify({"prev_queries": get_user_queries("random_user",
-                                                         redis_conn)})
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 4725bcb..eba7e4b 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -135,23 +135,3 @@ def get_gnqa(query, auth_token, data_dir=""):
         return task_id, answer, references
     else:
         return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", []
-
-
-def fetch_query_results(query, user_id, redis_conn):
-    """this method fetches prev user query searches"""
-    result = redis_conn.get(f"LLM:{user_id}-{query}")
-    if result:
-        return json.loads(result)
-    return {
-        "query": query,
-        "answer": "Sorry No answer for you",
-        "references": [],
-        "task_id": None
-    }
-
-
-def get_user_queries(user_id, redis_conn):
-    """methos to fetch all queries for a specific user"""
-    results = redis_conn.keys(f"LLM:{user_id}*")
-    return [query for query in
-            [result.partition("-")[2] for result in results] if query != ""]
-- 
cgit v1.2.3


From 13bb57cbd191ffe6e40e830ca08b9191b2dc5700 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Fri, 24 May 2024 15:34:53 +0300
Subject: Pass query as an argument to api_client ask method.

---
 gn3/llms/process.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index eba7e4b..d53a7fd 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -118,7 +118,7 @@ def get_gnqa(query, auth_token, data_dir=""):
     """
 
     api_client = GeneNetworkQAClient(api_key=auth_token)
-    res, task_id = api_client.ask('?ask=' + quote(query), auth_token)
+    res, task_id = api_client.ask('?ask=' + quote(query), query=query)
     if task_id == 0:
         raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
     res, status = api_client.get_answer(task_id)
-- 
cgit v1.2.3


From 651f307a4b8e60aaea0c8a7649a5b02aafce7a98 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Fri, 24 May 2024 15:59:30 +0300
Subject: Removed status check on get_gnqa function.

---
 gn3/llms/process.py | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index d53a7fd..ab2a80e 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -116,22 +116,11 @@ def get_gnqa(query, auth_token, data_dir=""):
          answer
          references: contains doc_name,reference,pub_med_info
     """
-
     api_client = GeneNetworkQAClient(api_key=auth_token)
     res, task_id = api_client.ask('?ask=' + quote(query), query=query)
-    if task_id == 0:
-        raise RuntimeError(f"Error connecting to Fahamu Api: {str(res)}")
-    res, status = api_client.get_answer(task_id)
-    if status == 1:
-        resp_text = filter_response_text(res.text)
-        if resp_text.get("data") is None:
-            return task_id, "Please try to rephrase your question to receive feedback", []
-        answer = resp_text['data']['answer']
-        context = resp_text['data']['context']
-        references = parse_context(
-            context, DocIDs().get_info, format_bibliography_info)
-        references = fetch_pubmed(references, "pubmed.json", data_dir)
-
-        return task_id, answer, references
-    else:
-        return task_id, "We couldn't provide a response,Please try to rephrase your question to receive feedback", []
+    res, _status = api_client.get_answer(task_id)
+    resp_text = filter_response_text(res.text)
+    answer = resp_text['data']['answer']
+    context = resp_text['data']['context']
+    return task_id, answer, fetch_pubmed(parse_context(
+        context, DocIDs().get_info, format_bibliography_info), "pubmed.json", data_dir)
-- 
cgit v1.2.3


From 105f2b36eb62b9b097e1cbf6fa815f98da77bc16 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Mon, 27 May 2024 14:09:02 +0300
Subject: Update Docstrings for Api endpoints and functions.

---
 gn3/api/llm.py      |  6 +++---
 gn3/llms/process.py | 48 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/api/llm.py b/gn3/api/llm.py
index ab33c7a..4b8ec52 100644
--- a/gn3/api/llm.py
+++ b/gn3/api/llm.py
@@ -17,7 +17,7 @@ gnqa = Blueprint("gnqa", __name__)
 
 @gnqa.route("/search", methods=["POST"])
 def search():
-    """Main gnqa endpoint"""
+    """Api  endpoint for searching queries in fahamu Api"""
     query = request.json.get("querygnqa", "")
     if not query:
         return jsonify({"error": "querygnqa is missing in the request"}), 400
@@ -56,7 +56,7 @@ def search():
 @gnqa.route("/rating/<task_id>", methods=["POST"])
 @require_oauth("profile")
 def rate_queries(task_id):
-    """Endpoint for rating qnqa query and answer"""
+    """Api endpoint for rating GNQA query and answer"""
     with (require_oauth.acquire("profile") as token,
           db.connection(current_app.config["LLM_DB_PATH"]) as conn):
         results = request.json
@@ -89,7 +89,7 @@ def rate_queries(task_id):
 @gnqa.route("/history", methods=["GET"])
 @require_oauth("profile user")
 def fetch_prev_history():
-    """ api method to fetch search query records"""
+    """Api endpoint to fetch GNQA previous search."""
     with (require_oauth.acquire("profile user") as token,
           db.connection(current_app.config["LLM_DB_PATH"]) as conn):
         cursor = conn.cursor()
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index ab2a80e..ade4104 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -35,7 +35,7 @@ class DocIDs():
             raise FileNotFoundError(f"{file_path}-- FIle does not exist\n")
 
     def format_doc_ids(self, docs):
-        """method to format doc_ids for list items"""
+        """method to format doc_ids for list items doc_id and doc_name"""
         for _key, val in docs.items():
             if isinstance(val, list):
                 for doc_obj in val:
@@ -43,7 +43,14 @@ class DocIDs():
                     self.doc_ids.update({doc_obj["id"]:  doc_name})
 
     def get_info(self, doc_id):
-        """ interface to make read from doc_ids"""
+        """ interface to make read from doc_ids
+           and extract info data  else returns
+           doc_id
+        Args:
+            doc_id: str: a search key for doc_ids
+        Returns:
+              an object with doc_info if doc_id in doc_ids
+        """
         if doc_id in self.doc_ids.keys():
             return self.doc_ids[doc_id]
         else:
@@ -51,7 +58,8 @@ class DocIDs():
 
 
 def format_bibliography_info(bib_info):
-    """Function for formatting bibliography info"""
+    """Utility function for formatting bibliography info
+    """
     if isinstance(bib_info, str):
         return bib_info.removesuffix('.txt')
     elif isinstance(bib_info, dict):
@@ -66,7 +74,15 @@ def filter_response_text(val):
 
 
 def parse_context(context, get_info_func, format_bib_func):
-    """function to parse doc_ids content"""
+    """Function to parse doc_ids content
+     Args:
+         context: raw references from  fahamu api
+         get_info_func: function to get doc_ids info
+         format_bib_func:  function to foramt bibliography info
+    Returns:
+          an list with each item having (doc_id,bib_info,
+          combined reference text)
+    """
     results = []
     for doc_ids, summary in context.items():
         combo_txt = ""
@@ -81,7 +97,12 @@ def parse_context(context, get_info_func, format_bib_func):
 
 
 def load_file(filename, dir_path):
-    """function to open and load json file"""
+    """Utility function to read json file
+    Args:
+        filename:  file name to read
+        dir_path:  base directory for the file
+    Returns: json data read to a dict
+    """
     file_path = os.path.join(dir_path, f"{filename}")
     if not os.path.isfile(file_path):
         raise FileNotFoundError(f"{filename} was not found or is a directory")
@@ -90,8 +111,19 @@ def load_file(filename, dir_path):
 
 
 def fetch_pubmed(references, file_name, data_dir=""):
-    """method to fetch and populate references with pubmed"""
+    """
+    Fetches PubMed data from a JSON file and populates the\
+    references dictionary.
+
+    Args:
+        references (dict): Dictionary with document IDs as keys\
+    and reference data as values.
+        filename (str): Name of the JSON file containing PubMed data.
+        data_dir (str): Base directory where the data files are located.
 
+    Returns:
+        dict: Updated references dictionary populated with the PubMed data.
+    """
     try:
         pubmed = load_file(file_name, os.path.join(data_dir, "gn-meta/lit"))
         for reference in references:
@@ -123,4 +155,6 @@ def get_gnqa(query, auth_token, data_dir=""):
     answer = resp_text['data']['answer']
     context = resp_text['data']['context']
     return task_id, answer, fetch_pubmed(parse_context(
-        context, DocIDs().get_info, format_bibliography_info), "pubmed.json", data_dir)
+                            context, DocIDs().get_info,
+                            format_bibliography_info),
+                            "pubmed.json", data_dir)
-- 
cgit v1.2.3


From d0801cea229d00d5d4ce19fa1cb36242e56070d1 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Mon, 27 May 2024 14:18:48 +0300
Subject: Delete filter response text method and update relevant code.

---
 gn3/llms/process.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index ade4104..2ce6b2b 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -67,12 +67,6 @@ def format_bibliography_info(bib_info):
     return bib_info
 
 
-def filter_response_text(val):
-    """helper function for filtering non-printable chars"""
-    return json.loads(''.join([str(char)
-                               for char in val if char in string.printable]))
-
-
 def parse_context(context, get_info_func, format_bib_func):
     """Function to parse doc_ids content
      Args:
@@ -151,7 +145,8 @@ def get_gnqa(query, auth_token, data_dir=""):
     api_client = GeneNetworkQAClient(api_key=auth_token)
     res, task_id = api_client.ask('?ask=' + quote(query), query=query)
     res, _status = api_client.get_answer(task_id)
-    resp_text = filter_response_text(res.text)
+    resp_text = json.loads(''.join([str(char)
+                                   for char in res.text if char in string.printable]))
     answer = resp_text['data']['answer']
     context = resp_text['data']['context']
     return task_id, answer, fetch_pubmed(parse_context(
-- 
cgit v1.2.3


From 58fbc6527537cb229ded87eea57949c3cf02621f Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Mon, 27 May 2024 14:39:38 +0300
Subject: Remove duplicate code for loading files.

---
 gn3/llms/process.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 2ce6b2b..40e53c5 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -21,19 +21,10 @@ class DocIDs():
         * doc_ids.json: opens doc)ids for gn references
         * sugar_doc_ids:  open doci_ids for diabetes references
         """
-        self.doc_ids = self.load_file("doc_ids.json")
-        self.sugar_doc_ids = self.load_file("all_files.json")
+        self.doc_ids = load_file("doc_ids.json", BASEDIR)
+        self.sugar_doc_ids = load_file("all_files.json", BASEDIR)
         self.format_doc_ids(self.sugar_doc_ids)
 
-    def load_file(self, file_name):
-        """Method to load and read doc_id files"""
-        file_path = os.path.join(BASEDIR, file_name)
-        if os.path.isfile(file_path):
-            with open(file_path, "rb") as file_handler:
-                return json.load(file_handler)
-        else:
-            raise FileNotFoundError(f"{file_path}-- FIle does not exist\n")
-
     def format_doc_ids(self, docs):
         """method to format doc_ids for list items doc_id and doc_name"""
         for _key, val in docs.items():
-- 
cgit v1.2.3


From d3f87b9a02bfec223d23c16eb1374d53065fea92 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Mon, 27 May 2024 17:37:13 +0300
Subject: Add regular expressions for parsing links in texts.

---
 gn3/llms/process.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 40e53c5..55c27a0 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -1,6 +1,7 @@
 """this module contains code for processing response from fahamu client.py"""
 # pylint: disable=C0301
 import os
+import re
 import string
 import json
 import logging
@@ -76,8 +77,13 @@ def parse_context(context, get_info_func, format_bib_func):
         doc_info = get_info_func(doc_ids)
         bib_info = doc_ids if doc_ids == doc_info else format_bib_func(
             doc_info)
+        pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*'
+        combo_text = re.sub(pattern,
+                            lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
+                            combo_txt)
         results.append(
-            {"doc_id": doc_ids, "bibInfo": bib_info, "comboTxt": combo_txt})
+            {"doc_id": doc_ids, "bibInfo": bib_info,
+             "comboTxt": combo_text})
     return results
 
 
@@ -137,8 +143,10 @@ def get_gnqa(query, auth_token, data_dir=""):
     res, task_id = api_client.ask('?ask=' + quote(query), query=query)
     res, _status = api_client.get_answer(task_id)
     resp_text = json.loads(''.join([str(char)
-                                   for char in res.text if char in string.printable]))
-    answer = resp_text['data']['answer']
+                           for char in res.text if char in string.printable]))
+    answer = re.sub(r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*',
+                    lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
+                    resp_text["data"]["answer"])
     context = resp_text['data']['context']
     return task_id, answer, fetch_pubmed(parse_context(
                             context, DocIDs().get_info,
-- 
cgit v1.2.3


From 2e81e48695e9b5618746c8cd1c6c83b452836442 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Mon, 2 Sep 2024 15:12:12 +0300
Subject: Fix minor syntax issue.

---
 gn3/llms/process.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 55c27a0..c3e6eda 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -19,8 +19,8 @@ class DocIDs():
     def __init__(self):
         """
         init method for Docids
-        * doc_ids.json: opens doc)ids for gn references
-        * sugar_doc_ids:  open doci_ids for diabetes references
+        * doc_ids.json: open doc_ids for gn references
+        * sugar_doc_ids:  open doc_ids for diabetes references
         """
         self.doc_ids = load_file("doc_ids.json", BASEDIR)
         self.sugar_doc_ids = load_file("all_files.json", BASEDIR)
-- 
cgit v1.2.3


From 086c80510ff418bca77f544d3dd4b174d2dc9c8e Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Mon, 2 Sep 2024 15:15:01 +0300
Subject: Remove unecessary check for open file.

---
 gn3/llms/process.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index c3e6eda..ef925c4 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -94,10 +94,8 @@ def load_file(filename, dir_path):
         dir_path:  base directory for the file
     Returns: json data read to a dict
     """
-    file_path = os.path.join(dir_path, f"{filename}")
-    if not os.path.isfile(file_path):
-        raise FileNotFoundError(f"{filename} was not found or is a directory")
-    with open(file_path, "rb") as file_handler:
+    with open(os.path.join(dir_path, f"{filename}"),
+              "rb") as file_handler:
         return json.load(file_handler)
 
 
-- 
cgit v1.2.3


From 742beb6ee663bc9ae5409461d2be4b2144b8893e Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Mon, 2 Sep 2024 15:17:12 +0300
Subject: Refactor doc_id object.

---
 gn3/llms/process.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index ef925c4..bfce9a5 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -23,8 +23,8 @@ class DocIDs():
         * sugar_doc_ids:  open doc_ids for diabetes references
         """
         self.doc_ids = load_file("doc_ids.json", BASEDIR)
-        self.sugar_doc_ids = load_file("all_files.json", BASEDIR)
-        self.format_doc_ids(self.sugar_doc_ids)
+        sugar_doc_ids = load_file("all_files.json", BASEDIR)
+        self.format_doc_ids(sugar_doc_ids)
 
     def format_doc_ids(self, docs):
         """method to format doc_ids for list items doc_id and doc_name"""
-- 
cgit v1.2.3


From c16c54759cfd493250424ee3f565862e5d6009b3 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Tue, 3 Sep 2024 10:41:33 +0300
Subject: Raise KeyError for doc_id not found in doc_ids.

---
 gn3/llms/process.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'gn3/llms/process.py')

diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index bfce9a5..b8e47e7 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -41,13 +41,10 @@ class DocIDs():
         Args:
             doc_id: str: a search key for doc_ids
         Returns:
-              an object with doc_info if doc_id in doc_ids
+              an object if doc id exists else
+              raises a KeyError
         """
-        if doc_id in self.doc_ids.keys():
-            return self.doc_ids[doc_id]
-        else:
-            return doc_id
-
+        return self.doc_ids[doc_id]
 
 def format_bibliography_info(bib_info):
     """Utility function for formatting bibliography info
@@ -74,9 +71,11 @@ def parse_context(context, get_info_func, format_bib_func):
         combo_txt = ""
         for entry in summary:
             combo_txt += "\t" + entry["text"]
-        doc_info = get_info_func(doc_ids)
-        bib_info = doc_ids if doc_ids == doc_info else format_bib_func(
-            doc_info)
+        try:
+            doc_info = get_info_func(doc_ids)
+            bib_info = format_bib_func(doc_info)
+        except KeyError:
+            bib_info = doc_ids
         pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*'
         combo_text = re.sub(pattern,
                             lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>",
-- 
cgit v1.2.3