diff options
Diffstat (limited to 'gnqa/paper2_eval/src/document_operations.py')
-rw-r--r-- | gnqa/paper2_eval/src/document_operations.py | 64 |
1 files changed, 45 insertions, 19 deletions
diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py index 2682e57..3112d91 100644 --- a/gnqa/paper2_eval/src/document_operations.py +++ b/gnqa/paper2_eval/src/document_operations.py @@ -1,10 +1,14 @@ import os -import sys +#import sys import json -import time -import configparser - -from r2r import R2R, Document, GenerationConfig, R2RClient +#import time +#import configparser +''' +from r2r import ( R2R, + Document, + GenerationConfig, + R2RClient ) +''' class DocOps: _type = '' @@ -21,17 +25,26 @@ class DocOps: def __init__(self): self._type = 'QuestionList' + def reset_responses(): + return { + 'question': [], + 'answer': [], + 'contexts': [] + #, + #'task_id': [] + } + def writeDatasetFile(responses, outp_file): print(outp_file) output = json.dumps(responses, indent=2) if os.path.exists(outp_file): with open(outp_file, "a") as the_data: - the_data.write('' + output) + the_data.write('\n\n' + output) else: with open(outp_file, "a") as the_data: the_data.write(output) - def get_ragas_out_dict(): + def get_r2r_ragas_out_dict(): return { "titles": [], "extraction_id": [], "document_id": [], @@ -40,6 +53,24 @@ class DocOps: "answer": "", "question": ""} + def read_json_document(file_name): + with open(file_name, "r") as result_file: + return json.load(result_file) + + def combine_responses(doc_lst, out_filename): + ragas_output = DocOps.reset_responses() + + for doc in doc_lst: + the_doc = DocOps.read_json_document(doc) + ragas_output['question'].append( + the_doc['question']) + ragas_output['answer'].append( + the_doc['answer']) + ragas_output['contexts'].append( + the_doc['contexts']) + DocOps.writeDatasetFile( + ragas_output, out_filename) + def extract_response(obj, values_key, thedict): if isinstance(obj, dict): @@ -49,14 +80,14 @@ class DocOps: thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip()) else: thedict[values_key[key]["name"]] = val.replace("\n", " ").strip() - print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose]) + print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [DocOps.verbose]) else: if (len(obj.items()) == 1 ): print(key, " --> ", val) - extract_response(val, values_key, thedict) + DocOps.extract_response(val, values_key, thedict) elif isinstance(obj, list): for item in obj: - extract_response(item, values_key, thedict) + DocOps.extract_response(item, values_key, thedict) class QuestionList: _verbose = 0 @@ -83,17 +114,12 @@ class QuestionList: self.parse_document() #self._print() + def read_document(self): - with open(self._fname, "r") as r_file: - self._doc = json.load(r_file) + self._doc = DocOps.read_json_document( + self._fname) + - def reset_responses(): - return { - 'question': [], - 'answer': [], - 'contexts': [], - 'task_id': [] - } def parse_document(self): print(('', '\nParse question list') [self._verbose] ) |