about summary refs log tree commit diff
path: root/gnqa/paper2_eval/src/document_operations.py
diff options
context:
space:
mode:
authorShelbySolomonDarnell2024-09-25 16:17:35 +0300
committerShelbySolomonDarnell2024-09-25 16:17:35 +0300
commit11a7a465afcaa875d5fdebe840e60b1e7be5ee76 (patch)
tree50dd1369f7fa609577f28885e3c67459f9ad8380 /gnqa/paper2_eval/src/document_operations.py
parent29593ada24d6847c91d643edcbc48607ff3ea9f8 (diff)
downloadgn-ai-11a7a465afcaa875d5fdebe840e60b1e7be5ee76.tar.gz
Reading question list, processing response and creating output file.
Diffstat (limited to 'gnqa/paper2_eval/src/document_operations.py')
-rw-r--r--gnqa/paper2_eval/src/document_operations.py84
1 files changed, 84 insertions, 0 deletions
diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py
new file mode 100644
index 00000000..0c3522f3
--- /dev/null
+++ b/gnqa/paper2_eval/src/document_operations.py
@@ -0,0 +1,84 @@
+import os
+import sys
+import json
+import time
+import configparser
+
+from r2r import R2R, Document, GenerationConfig, R2RClient
+
+class DocOps:
+    _type = ''
+
+    def __init__(self):
+        self._type = 'QuestionList'
+
+    def writeDatasetFile(responses, outp_file):
+        print(outp_file)
+        output = json.dumps(responses, indent=2)
+        if os.path.exists(outp_file):
+            with open(outp_file, "a") as the_data:
+                the_data.write('' + output)
+        else:
+            with open(outp_file, "a") as the_data:
+                the_data.write(output)
+
+class QuestionList:
+    _verbose = 0
+    _doc = ''
+    _fname = ''
+    _question_list = {
+        "domainexpert": { 
+            "gn":  [],
+            "aging":    [],
+            "diabetes": []
+        },
+        "citizenscientist": { 
+            "gn":  [],
+            "aging":    [],
+            "diabetes": []
+        }
+    }
+
+    def __init__(self, the_file, verbose=0):
+        print('QuestionList has been initialized {0}, verbosity is {1}'.format(the_file, verbose))
+        self._fname = the_file
+        self._verbose = verbose
+        self.read_document()
+        self.parse_document()
+        #self._print()
+
+    def read_document(self):
+        with open(self._fname, "r") as r_file:
+            self._doc = json.load(r_file)
+
+    def reset_responses():
+        return {
+            'question': [],
+            'answer':   [],
+            'contexts':  [],
+            'task_id': []
+        }
+
+    def parse_document(self):
+        print(('', '\nParse question list') [self._verbose] )
+        for item in self._doc:
+            level     = item['level']
+            domain    = item['domain']
+            query_lst = item['query']
+            self._question_list[level][domain] = query_lst
+            #print(('', 'Level --> {0} \tDomain --> {1}\n{2}'.format(level, domain, self.print_list(query_lst))) [self._verbose])
+            #create_datasets(query_lst, domain, level)
+
+    def print_list(self, the_lst):
+        ndx = 1 
+        for item in the_lst:
+            print('\t[{0}] {1}'.format(ndx, item))
+            ndx += 1
+    
+    def _print(self):
+        print(json.dumps(self._question_list, indent=2))
+
+    def get(self, level, domain):
+        return self._question_list[level][domain]
+    
+