From 11a7a465afcaa875d5fdebe840e60b1e7be5ee76 Mon Sep 17 00:00:00 2001 From: ShelbySolomonDarnell Date: Wed, 25 Sep 2024 16:17:35 +0300 Subject: Reading question list, processing response and creating output file. --- gnqa/paper2_eval/src/document_operations.py | 84 +++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 gnqa/paper2_eval/src/document_operations.py (limited to 'gnqa/paper2_eval/src/document_operations.py') diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py new file mode 100644 index 0000000..0c3522f --- /dev/null +++ b/gnqa/paper2_eval/src/document_operations.py @@ -0,0 +1,84 @@ +import os +import sys +import json +import time +import configparser + +from r2r import R2R, Document, GenerationConfig, R2RClient + +class DocOps: + _type = '' + + def __init__(self): + self._type = 'QuestionList' + + def writeDatasetFile(responses, outp_file): + print(outp_file) + output = json.dumps(responses, indent=2) + if os.path.exists(outp_file): + with open(outp_file, "a") as the_data: + the_data.write('' + output) + else: + with open(outp_file, "a") as the_data: + the_data.write(output) + +class QuestionList: + _verbose = 0 + _doc = '' + _fname = '' + _question_list = { + "domainexpert": { + "gn": [], + "aging": [], + "diabetes": [] + }, + "citizenscientist": { + "gn": [], + "aging": [], + "diabetes": [] + } + } + + def __init__(self, the_file, verbose=0): + print('QuestionList has been initialized {0}, verbosity is {1}'.format(the_file, verbose)) + self._fname = the_file + self._verbose = verbose + self.read_document() + self.parse_document() + #self._print() + + def read_document(self): + with open(self._fname, "r") as r_file: + self._doc = json.load(r_file) + + def reset_responses(): + return { + 'question': [], + 'answer': [], + 'contexts': [], + 'task_id': [] + } + + def parse_document(self): + print(('', '\nParse question list') [self._verbose] ) + for item in self._doc: + level = item['level'] + domain = item['domain'] + query_lst = item['query'] + self._question_list[level][domain] = query_lst + #print(('', 'Level --> {0} \tDomain --> {1}\n{2}'.format(level, domain, self.print_list(query_lst))) [self._verbose]) + #create_datasets(query_lst, domain, level) + + def print_list(self, the_lst): + ndx = 1 + for item in the_lst: + print('\t[{0}] {1}'.format(ndx, item)) + ndx += 1 + + def _print(self): + print(json.dumps(self._question_list, indent=2)) + + def get(self, level, domain): + return self._question_list[level][domain] + + -- cgit v1.2.3