1 files changed, 45 insertions, 19 deletions
diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py
index 2682e57..3112d91 100644
--- a/gnqa/paper2_eval/src/document_operations.py
+++ b/gnqa/paper2_eval/src/document_operations.py
@@ -1,10 +1,14 @@
 import os
-import sys
+#import sys
 import json
-import time
-import configparser
-
-from r2r import R2R, Document, GenerationConfig, R2RClient
+#import time
+#import configparser
+'''
+from r2r import ( R2R, 
+                  Document, 
+                  GenerationConfig, 
+                  R2RClient )
+'''
 
 class DocOps:
     _type = ''
@@ -21,17 +25,26 @@ class DocOps:
     def __init__(self):
         self._type = 'QuestionList'
 
+    def reset_responses():
+        return {
+            'question': [],
+            'answer':   [],
+            'contexts':  []
+            #,
+            #'task_id': []
+        }
+
     def writeDatasetFile(responses, outp_file):
         print(outp_file)
         output = json.dumps(responses, indent=2)
         if os.path.exists(outp_file):
             with open(outp_file, "a") as the_data:
-                the_data.write('' + output)
+                the_data.write('\n\n' + output)
         else:
             with open(outp_file, "a") as the_data:
                 the_data.write(output)
 
-    def get_ragas_out_dict():
+    def get_r2r_ragas_out_dict():
         return { "titles":        [],
                 "extraction_id": [],
                 "document_id":   [],
@@ -40,6 +53,24 @@ class DocOps:
                 "answer":        "",
                 "question":      ""}
 
+    def read_json_document(file_name):
+        with open(file_name, "r") as result_file:
+            return json.load(result_file)
+    
+    def combine_responses(doc_lst, out_filename):
+        ragas_output = DocOps.reset_responses()
+
+        for doc in doc_lst:
+            the_doc = DocOps.read_json_document(doc)
+            ragas_output['question'].append(
+                the_doc['question'])
+            ragas_output['answer'].append(
+                the_doc['answer'])
+            ragas_output['contexts'].append(
+                the_doc['contexts'])
+        DocOps.writeDatasetFile(
+            ragas_output, out_filename)
+
 
     def extract_response(obj, values_key, thedict):
         if isinstance(obj, dict):
@@ -49,14 +80,14 @@ class DocOps:
                         thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip())
                     else:
                         thedict[values_key[key]["name"]] = val.replace("\n", " ").strip()
-                    print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose])
+                    print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [DocOps.verbose])
                 else:
                     if (len(obj.items()) == 1 ):
                         print(key, " --> ", val)
-                extract_response(val, values_key, thedict)
+                DocOps.extract_response(val, values_key, thedict)
         elif isinstance(obj, list):
             for item in obj:
-                extract_response(item, values_key, thedict)
+                DocOps.extract_response(item, values_key, thedict)
 
 class QuestionList:
     _verbose = 0
@@ -83,17 +114,12 @@ class QuestionList:
         self.parse_document()
         #self._print()
 
+
     def read_document(self):
-        with open(self._fname, "r") as r_file:
-            self._doc = json.load(r_file)
+        self._doc = DocOps.read_json_document(
+            self._fname)
+
 
-    def reset_responses():
-        return {
-            'question': [],
-            'answer':   [],
-            'contexts':  [],
-            'task_id': []
-        }
 
     def parse_document(self):
         print(('', '\nParse question list') [self._verbose] )