blob: 2f003129f4d5d674ad6c020aae7fc4707d27f89f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
""" Module contains code for parsing references doc_ids """
# pylint: disable=C0301
import json
import os
basedir = os.path.abspath(os.path.dirname(__file__))
class DocIDs():
""" Class Method to Parse document id and names"""
def __init__(self):
"""
init method for Docids
* doc_ids.json: opens doc)ids for gn references
* sugar_doc_ids: open doci_ids for diabetes references
"""
self.doc_ids = self.load_file("doc_ids.json")
self.sugar_doc_ids = self.load_file("all_files.json")
self.format_doc_ids(self.sugar_doc_ids)
def load_file(self, file_name):
"""Method to load and read doc_id files"""
file_path = os.path.join(basedir, file_name)
if os.path.isfile(file_path):
with open(file_path, "rb") as file_handler:
return json.load(file_handler)
else:
raise FileNotFoundError(f"{file_path}-- FIle does not exist\n")
def format_doc_ids(self, docs):
"""method to format doc_ids for list items"""
for _key, val in docs.items():
if isinstance(val, list):
for doc_obj in val:
doc_name = doc_obj["filename"].removesuffix(".pdf").removesuffix(".txt").replace("_", "")
self.doc_ids.update({doc_obj["id"]: doc_name})
def get_info(self, doc_id):
""" interface to make read from doc_ids"""
if doc_id in self.doc_ids.keys():
return self.doc_ids[doc_id]
else:
return doc_id
|