aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShelbySolomonDarnell2024-09-25 16:17:35 +0300
committerShelbySolomonDarnell2024-09-25 16:17:35 +0300
commit11a7a465afcaa875d5fdebe840e60b1e7be5ee76 (patch)
tree50dd1369f7fa609577f28885e3c67459f9ad8380
parent29593ada24d6847c91d643edcbc48607ff3ea9f8 (diff)
downloadgn-ai-11a7a465afcaa875d5fdebe840e60b1e7be5ee76.tar.gz
Reading question list, processing response and creating output file.
-rw-r--r--.gitignore2
-rw-r--r--gnqa/paper2_eval/src/document_operations.py84
-rw-r--r--gnqa/paper2_eval/src/run_questions.py49
3 files changed, 107 insertions, 28 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3867aa4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+gnqa/paper2_eval/data/testresp2.json
+gnqa/paper2_eval/src/__pycache__/document_operations.cpython-310.pyc
diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py
new file mode 100644
index 0000000..0c3522f
--- /dev/null
+++ b/gnqa/paper2_eval/src/document_operations.py
@@ -0,0 +1,84 @@
+import os
+import sys
+import json
+import time
+import configparser
+
+from r2r import R2R, Document, GenerationConfig, R2RClient
+
+class DocOps:
+ _type = ''
+
+ def __init__(self):
+ self._type = 'QuestionList'
+
+ def writeDatasetFile(responses, outp_file):
+ print(outp_file)
+ output = json.dumps(responses, indent=2)
+ if os.path.exists(outp_file):
+ with open(outp_file, "a") as the_data:
+ the_data.write('' + output)
+ else:
+ with open(outp_file, "a") as the_data:
+ the_data.write(output)
+
+class QuestionList:
+ _verbose = 0
+ _doc = ''
+ _fname = ''
+ _question_list = {
+ "domainexpert": {
+ "gn": [],
+ "aging": [],
+ "diabetes": []
+ },
+ "citizenscientist": {
+ "gn": [],
+ "aging": [],
+ "diabetes": []
+ }
+ }
+
+ def __init__(self, the_file, verbose=0):
+ print('QuestionList has been initialized {0}, verbosity is {1}'.format(the_file, verbose))
+ self._fname = the_file
+ self._verbose = verbose
+ self.read_document()
+ self.parse_document()
+ #self._print()
+
+ def read_document(self):
+ with open(self._fname, "r") as r_file:
+ self._doc = json.load(r_file)
+
+ def reset_responses():
+ return {
+ 'question': [],
+ 'answer': [],
+ 'contexts': [],
+ 'task_id': []
+ }
+
+ def parse_document(self):
+ print(('', '\nParse question list') [self._verbose] )
+ for item in self._doc:
+ level = item['level']
+ domain = item['domain']
+ query_lst = item['query']
+ self._question_list[level][domain] = query_lst
+ #print(('', 'Level --> {0} \tDomain --> {1}\n{2}'.format(level, domain, self.print_list(query_lst))) [self._verbose])
+ #create_datasets(query_lst, domain, level)
+
+ def print_list(self, the_lst):
+ ndx = 1
+ for item in the_lst:
+ print('\t[{0}] {1}'.format(ndx, item))
+ ndx += 1
+
+ def _print(self):
+ print(json.dumps(self._question_list, indent=2))
+
+ def get(self, level, domain):
+ return self._question_list[level][domain]
+
+
diff --git a/gnqa/paper2_eval/src/run_questions.py b/gnqa/paper2_eval/src/run_questions.py
index 5e0b28b..8805329 100644
--- a/gnqa/paper2_eval/src/run_questions.py
+++ b/gnqa/paper2_eval/src/run_questions.py
@@ -3,37 +3,24 @@ import sys
import os
from r2r import R2R, Document, GenerationConfig, R2RClient
+from document_operations import DocOps, QuestionList
-
-def writeDatasetFile(responses, outp_file):
- print(outp_file)
- output = json.dumps(responses, indent=2)
- if os.path.exists(outp_file):
- with open(outp_file, "a") as the_data:
- the_data.write('' + output)
- else:
- with open(outp_file, "a") as the_data:
- the_data.write(output)
-
-client = R2RClient("http://localhost:8000")
-
-health_resp = client.health()
-
-print("The R2R client's health status is {0}".format(health_resp))
-
-questions = [
- "List as many studies as you can that include rapamycin.",
- "Why is it so difficult to map gene loci that control aging in humans?"
-]
+'''
+*******************************************************************************
+Variables
+*******************************************************************************
+'''
rag_response = {}
+client = R2RClient("http://localhost:8000")
+health_resp = client.health()
-ndx = 1
-for question in questions:
- rag_response[str(ndx)] = client.rag(question)
- ndx = ndx + 1
-
-#print(json.dumps(rag_response, indent=2))
+'''
+*******************************************************************************
+Commands
+*******************************************************************************
+'''
+print("The R2R client's health status is {0}".format(health_resp))
try:
read_file = str(sys.argv[1])
@@ -41,5 +28,11 @@ try:
except:
exit('Example use "python run_questions.py ../data/questions/human/de/aging.json ../data/responses/human/de/aging_resp.json"')
+qLst = QuestionList(read_file, 1) # second parameter is for verbose output
+ndx = 1
+for question in qLst.get("domainexpert","aging"):
+ print('Getting response for the following question --> {0}'.format(question))
+ rag_response[str(ndx)] = client.rag(question)
+ ndx += 1
-writeDatasetFile(rag_response, out_file) \ No newline at end of file
+DocOps.writeDatasetFile(rag_response, out_file) \ No newline at end of file