aboutsummaryrefslogtreecommitdiff
path: root/gnqa/paper2_eval/src/document_operations.py
diff options
context:
space:
mode:
Diffstat (limited to 'gnqa/paper2_eval/src/document_operations.py')
-rw-r--r--gnqa/paper2_eval/src/document_operations.py64
1 files changed, 45 insertions, 19 deletions
diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py
index 2682e57..3112d91 100644
--- a/gnqa/paper2_eval/src/document_operations.py
+++ b/gnqa/paper2_eval/src/document_operations.py
@@ -1,10 +1,14 @@
import os
-import sys
+#import sys
import json
-import time
-import configparser
-
-from r2r import R2R, Document, GenerationConfig, R2RClient
+#import time
+#import configparser
+'''
+from r2r import ( R2R,
+ Document,
+ GenerationConfig,
+ R2RClient )
+'''
class DocOps:
_type = ''
@@ -21,17 +25,26 @@ class DocOps:
def __init__(self):
self._type = 'QuestionList'
+ def reset_responses():
+ return {
+ 'question': [],
+ 'answer': [],
+ 'contexts': []
+ #,
+ #'task_id': []
+ }
+
def writeDatasetFile(responses, outp_file):
print(outp_file)
output = json.dumps(responses, indent=2)
if os.path.exists(outp_file):
with open(outp_file, "a") as the_data:
- the_data.write('' + output)
+ the_data.write('\n\n' + output)
else:
with open(outp_file, "a") as the_data:
the_data.write(output)
- def get_ragas_out_dict():
+ def get_r2r_ragas_out_dict():
return { "titles": [],
"extraction_id": [],
"document_id": [],
@@ -40,6 +53,24 @@ class DocOps:
"answer": "",
"question": ""}
+ def read_json_document(file_name):
+ with open(file_name, "r") as result_file:
+ return json.load(result_file)
+
+ def combine_responses(doc_lst, out_filename):
+ ragas_output = DocOps.reset_responses()
+
+ for doc in doc_lst:
+ the_doc = DocOps.read_json_document(doc)
+ ragas_output['question'].append(
+ the_doc['question'])
+ ragas_output['answer'].append(
+ the_doc['answer'])
+ ragas_output['contexts'].append(
+ the_doc['contexts'])
+ DocOps.writeDatasetFile(
+ ragas_output, out_filename)
+
def extract_response(obj, values_key, thedict):
if isinstance(obj, dict):
@@ -49,14 +80,14 @@ class DocOps:
thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip())
else:
thedict[values_key[key]["name"]] = val.replace("\n", " ").strip()
- print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose])
+ print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [DocOps.verbose])
else:
if (len(obj.items()) == 1 ):
print(key, " --> ", val)
- extract_response(val, values_key, thedict)
+ DocOps.extract_response(val, values_key, thedict)
elif isinstance(obj, list):
for item in obj:
- extract_response(item, values_key, thedict)
+ DocOps.extract_response(item, values_key, thedict)
class QuestionList:
_verbose = 0
@@ -83,17 +114,12 @@ class QuestionList:
self.parse_document()
#self._print()
+
def read_document(self):
- with open(self._fname, "r") as r_file:
- self._doc = json.load(r_file)
+ self._doc = DocOps.read_json_document(
+ self._fname)
+
- def reset_responses():
- return {
- 'question': [],
- 'answer': [],
- 'contexts': [],
- 'task_id': []
- }
def parse_document(self):
print(('', '\nParse question list') [self._verbose] )