Move Parsing Doc_Ids to process file

* Context: groups related items
author: Alexander_Kabui 2024-05-16 14:20:00 +0300
committer: Alexander_Kabui 2024-05-16 14:20:00 +0300
commit: 75365bd88a720261a1b454f0ea11a840fb3be83e (patch)
tree: 97de0b58d27ccf16d553eecf3b4c6a7a953e7bb9 /gn3
parent: 69013d298c869a42059af13bc63bef1bbdc7393d (diff)
download: genenetwork3-75365bd88a720261a1b454f0ea11a840fb3be83e.tar.gz
1 files changed, 43 insertions, 8 deletions
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
index 4edc238..1881e92 100644
--- a/gn3/llms/process.py
+++ b/gn3/llms/process.py
@@ -1,21 +1,56 @@
 """this module contains code for processing response from fahamu client.py"""
+# pylint: disable=C0301
 import os
 import string
 import json
+import logging
+import requests
 
 from urllib.parse import urljoin
 from urllib.parse import quote
-import logging
-import requests
 
 from gn3.llms.client import GeneNetworkQAClient
-from gn3.llms.response import DocIDs
 
 
 BASE_URL = 'https://genenetwork.fahamuai.com/api/tasks'
-
-
-# pylint: disable=C0301
+BASEDIR = os.path.abspath(os.path.dirname(__file__))
+
+
+class DocIDs():
+    """ Class Method to Parse document id and names from files"""
+    def __init__(self):
+        """
+        init method for Docids
+        * doc_ids.json: opens doc)ids for gn references
+        * sugar_doc_ids:  open doci_ids for diabetes references
+        """
+        self.doc_ids = self.load_file("doc_ids.json")
+        self.sugar_doc_ids = self.load_file("all_files.json")
+        self.format_doc_ids(self.sugar_doc_ids)
+
+    def load_file(self, file_name):
+        """Method to load and read doc_id files"""
+        file_path = os.path.join(BASEDIR, file_name)
+        if os.path.isfile(file_path):
+            with open(file_path, "rb") as file_handler:
+                return json.load(file_handler)
+        else:
+            raise FileNotFoundError(f"{file_path}-- FIle does not exist\n")
+
+    def format_doc_ids(self, docs):
+        """method to format doc_ids for list items"""
+        for _key, val in docs.items():
+            if isinstance(val, list):
+                for doc_obj in val:
+                    doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "")
+                    self.doc_ids.update({doc_obj["id"]:  doc_name})
+
+    def get_info(self, doc_id):
+        """ interface to make read from doc_ids"""
+        if doc_id in self.doc_ids.keys():
+            return self.doc_ids[doc_id]
+        else:
+            return doc_id
 
 
 def format_bibliography_info(bib_info):
@@ -131,6 +166,6 @@ def fetch_query_results(query, user_id, redis_conn):
 
 def get_user_queries(user_id, redis_conn):
     """methos to fetch all queries for a specific user"""
-
     results = redis_conn.keys(f"LLM:{user_id}*")
-    return [query for query in [result.partition("-")[2] for result in results] if query != ""]
+    return [query for query in
+            [result.partition("-")[2] for result in results] if query != ""]
author	Alexander_Kabui	2024-05-16 14:20:00 +0300
committer	Alexander_Kabui	2024-05-16 14:20:00 +0300
commit	75365bd88a720261a1b454f0ea11a840fb3be83e (patch)
tree	97de0b58d27ccf16d553eecf3b4c6a7a953e7bb9 /gn3
parent	69013d298c869a42059af13bc63bef1bbdc7393d (diff)
download	genenetwork3-75365bd88a720261a1b454f0ea11a840fb3be83e.tar.gz