aboutsummaryrefslogtreecommitdiff
path: root/gnqa/paper2_eval/src/document_operations.py
diff options
context:
space:
mode:
authorShelbySolomonDarnell2024-09-25 16:17:35 +0300
committerShelbySolomonDarnell2024-09-25 16:17:35 +0300
commit11a7a465afcaa875d5fdebe840e60b1e7be5ee76 (patch)
tree50dd1369f7fa609577f28885e3c67459f9ad8380 /gnqa/paper2_eval/src/document_operations.py
parent29593ada24d6847c91d643edcbc48607ff3ea9f8 (diff)
downloadgn-ai-11a7a465afcaa875d5fdebe840e60b1e7be5ee76.tar.gz
Reading question list, processing response and creating output file.
Diffstat (limited to 'gnqa/paper2_eval/src/document_operations.py')
-rw-r--r--gnqa/paper2_eval/src/document_operations.py84
1 files changed, 84 insertions, 0 deletions
diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py
new file mode 100644
index 0000000..0c3522f
--- /dev/null
+++ b/gnqa/paper2_eval/src/document_operations.py
@@ -0,0 +1,84 @@
+import os
+import sys
+import json
+import time
+import configparser
+
+from r2r import R2R, Document, GenerationConfig, R2RClient
+
+class DocOps:
+ _type = ''
+
+ def __init__(self):
+ self._type = 'QuestionList'
+
+ def writeDatasetFile(responses, outp_file):
+ print(outp_file)
+ output = json.dumps(responses, indent=2)
+ if os.path.exists(outp_file):
+ with open(outp_file, "a") as the_data:
+ the_data.write('' + output)
+ else:
+ with open(outp_file, "a") as the_data:
+ the_data.write(output)
+
+class QuestionList:
+ _verbose = 0
+ _doc = ''
+ _fname = ''
+ _question_list = {
+ "domainexpert": {
+ "gn": [],
+ "aging": [],
+ "diabetes": []
+ },
+ "citizenscientist": {
+ "gn": [],
+ "aging": [],
+ "diabetes": []
+ }
+ }
+
+ def __init__(self, the_file, verbose=0):
+ print('QuestionList has been initialized {0}, verbosity is {1}'.format(the_file, verbose))
+ self._fname = the_file
+ self._verbose = verbose
+ self.read_document()
+ self.parse_document()
+ #self._print()
+
+ def read_document(self):
+ with open(self._fname, "r") as r_file:
+ self._doc = json.load(r_file)
+
+ def reset_responses():
+ return {
+ 'question': [],
+ 'answer': [],
+ 'contexts': [],
+ 'task_id': []
+ }
+
+ def parse_document(self):
+ print(('', '\nParse question list') [self._verbose] )
+ for item in self._doc:
+ level = item['level']
+ domain = item['domain']
+ query_lst = item['query']
+ self._question_list[level][domain] = query_lst
+ #print(('', 'Level --> {0} \tDomain --> {1}\n{2}'.format(level, domain, self.print_list(query_lst))) [self._verbose])
+ #create_datasets(query_lst, domain, level)
+
+ def print_list(self, the_lst):
+ ndx = 1
+ for item in the_lst:
+ print('\t[{0}] {1}'.format(ndx, item))
+ ndx += 1
+
+ def _print(self):
+ print(json.dumps(self._question_list, indent=2))
+
+ def get(self, level, domain):
+ return self._question_list[level][domain]
+
+