diff options
author | Alexander_Kabui | 2024-05-16 14:05:02 +0300 |
---|---|---|
committer | Alexander_Kabui | 2024-05-16 14:05:02 +0300 |
commit | 92ab53e9f0b10ebf7423626e37b3fe73eb8c0b65 (patch) | |
tree | 59c1f638d6de48a59aca933c94ccaf894e3b0ee7 /gn3/llms | |
parent | 167706bb417627b1d9fcacb7cdcebeafe886c1ba (diff) | |
download | genenetwork3-92ab53e9f0b10ebf7423626e37b3fe73eb8c0b65.tar.gz |
Refactor code for llm:response
Diffstat (limited to 'gn3/llms')
-rw-r--r-- | gn3/llms/response.py | 60 |
1 files changed, 28 insertions, 32 deletions
diff --git a/gn3/llms/response.py b/gn3/llms/response.py index 7fce75b..2f00312 100644 --- a/gn3/llms/response.py +++ b/gn3/llms/response.py @@ -1,46 +1,42 @@ - -# pylint: skip-file +""" Module contains code for parsing references doc_ids """ +# pylint: disable=C0301 import json import os - basedir = os.path.abspath(os.path.dirname(__file__)) class DocIDs(): + """ Class Method to Parse document id and names""" def __init__(self): - # open doc ids for GN refs - self.doc_ids = self.loadFile("doc_ids.json") - # open doc ids for Diabetes references - self.sugar_doc_ids = self.loadFile("all_files.json") - # format is not what I prefer, it needs to be rebuilt - self.formatDocIDs(self.sugar_doc_ids) - - def loadFile(self, file_name): + """ + init method for Docids + * doc_ids.json: opens doc)ids for gn references + * sugar_doc_ids: open doci_ids for diabetes references + """ + self.doc_ids = self.load_file("doc_ids.json") + self.sugar_doc_ids = self.load_file("all_files.json") + self.format_doc_ids(self.sugar_doc_ids) + + def load_file(self, file_name): + """Method to load and read doc_id files""" file_path = os.path.join(basedir, file_name) if os.path.isfile(file_path): - f = open(file_path, "rb") - result = json.load(f) - f.close() - return result + with open(file_path, "rb") as file_handler: + return json.load(file_handler) else: - raise Exception("\n{0} -- File does not exist\n".format(file_path)) - - def formatDocIDs(self, values): - for _key, _val in values.items(): - if isinstance(_val, list): - for theObject in _val: - docName = self.formatDocumentName(theObject['filename']) - docID = theObject['id'] - self.doc_ids.update({docID: docName}) - - def formatDocumentName(self, val): - result = val.removesuffix('.pdf') - result = result.removesuffix('.txt') - result = result.replace('_', ' ') - return result - - def getInfo(self, doc_id): + raise FileNotFoundError(f"{file_path}-- FIle does not exist\n") + + def format_doc_ids(self, docs): + """method to format doc_ids for list items""" + for _key, val in docs.items(): + if isinstance(val, list): + for doc_obj in val: + doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "") + self.doc_ids.update({doc_obj["id"]: doc_name}) + + def get_info(self, doc_id): + """ interface to make read from doc_ids""" if doc_id in self.doc_ids.keys(): return self.doc_ids[doc_id] else: |