diff options
-rw-r--r-- | gn3/llms/response.py | 60 |
1 files changed, 28 insertions, 32 deletions
diff --git a/gn3/llms/response.py b/gn3/llms/response.py index 7fce75b..2f00312 100644 --- a/gn3/llms/response.py +++ b/gn3/llms/response.py @@ -1,46 +1,42 @@ - -# pylint: skip-file +""" Module contains code for parsing references doc_ids """ +# pylint: disable=C0301 import json import os - basedir = os.path.abspath(os.path.dirname(__file__)) class DocIDs(): + """ Class Method to Parse document id and names""" def __init__(self): - # open doc ids for GN refs - self.doc_ids = self.loadFile("doc_ids.json") - # open doc ids for Diabetes references - self.sugar_doc_ids = self.loadFile("all_files.json") - # format is not what I prefer, it needs to be rebuilt - self.formatDocIDs(self.sugar_doc_ids) - - def loadFile(self, file_name): + """ + init method for Docids + * doc_ids.json: opens doc)ids for gn references + * sugar_doc_ids: open doci_ids for diabetes references + """ + self.doc_ids = self.load_file("doc_ids.json") + self.sugar_doc_ids = self.load_file("all_files.json") + self.format_doc_ids(self.sugar_doc_ids) + + def load_file(self, file_name): + """Method to load and read doc_id files""" file_path = os.path.join(basedir, file_name) if os.path.isfile(file_path): - f = open(file_path, "rb") - result = json.load(f) - f.close() - return result + with open(file_path, "rb") as file_handler: + return json.load(file_handler) else: - raise Exception("\n{0} -- File does not exist\n".format(file_path)) - - def formatDocIDs(self, values): - for _key, _val in values.items(): - if isinstance(_val, list): - for theObject in _val: - docName = self.formatDocumentName(theObject['filename']) - docID = theObject['id'] - self.doc_ids.update({docID: docName}) - - def formatDocumentName(self, val): - result = val.removesuffix('.pdf') - result = result.removesuffix('.txt') - result = result.replace('_', ' ') - return result - - def getInfo(self, doc_id): + raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") + + def format_doc_ids(self, docs): + """method to format doc_ids for list items""" + for _key, val in docs.items(): + if isinstance(val, list): + for doc_obj in val: + doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") + self.doc_ids.update({doc_obj["id"]: doc_name}) + + def get_info(self, doc_id): + """ interface to make read from doc_ids""" if doc_id in self.doc_ids.keys(): return self.doc_ids[doc_id] else: |