Reading question list, processing response and creating output file.

author: ShelbySolomonDarnell 2024-09-25 16:17:35 +0300
committer: ShelbySolomonDarnell 2024-09-25 16:17:35 +0300
commit: 11a7a465afcaa875d5fdebe840e60b1e7be5ee76 (patch)
tree: 50dd1369f7fa609577f28885e3c67459f9ad8380 /gnqa
parent: 29593ada24d6847c91d643edcbc48607ff3ea9f8 (diff)
download: gn-ai-11a7a465afcaa875d5fdebe840e60b1e7be5ee76.tar.gz
2 files changed, 105 insertions, 28 deletions
diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py
new file mode 100644
index 00000000..0c3522f3
--- /dev/null
+++ b/gnqa/paper2_eval/src/document_operations.py
@@ -0,0 +1,84 @@
+import os
+import sys
+import json
+import time
+import configparser
+
+from r2r import R2R, Document, GenerationConfig, R2RClient
+
+class DocOps:
+    _type = ''
+
+    def __init__(self):
+        self._type = 'QuestionList'
+
+    def writeDatasetFile(responses, outp_file):
+        print(outp_file)
+        output = json.dumps(responses, indent=2)
+        if os.path.exists(outp_file):
+            with open(outp_file, "a") as the_data:
+                the_data.write('' + output)
+        else:
+            with open(outp_file, "a") as the_data:
+                the_data.write(output)
+
+class QuestionList:
+    _verbose = 0
+    _doc = ''
+    _fname = ''
+    _question_list = {
+        "domainexpert": { 
+            "gn":  [],
+            "aging":    [],
+            "diabetes": []
+        },
+        "citizenscientist": { 
+            "gn":  [],
+            "aging":    [],
+            "diabetes": []
+        }
+    }
+
+    def __init__(self, the_file, verbose=0):
+        print('QuestionList has been initialized {0}, verbosity is {1}'.format(the_file, verbose))
+        self._fname = the_file
+        self._verbose = verbose
+        self.read_document()
+        self.parse_document()
+        #self._print()
+
+    def read_document(self):
+        with open(self._fname, "r") as r_file:
+            self._doc = json.load(r_file)
+
+    def reset_responses():
+        return {
+            'question': [],
+            'answer':   [],
+            'contexts':  [],
+            'task_id': []
+        }
+
+    def parse_document(self):
+        print(('', '\nParse question list') [self._verbose] )
+        for item in self._doc:
+            level     = item['level']
+            domain    = item['domain']
+            query_lst = item['query']
+            self._question_list[level][domain] = query_lst
+            #print(('', 'Level --> {0} \tDomain --> {1}\n{2}'.format(level, domain, self.print_list(query_lst))) [self._verbose])
+            #create_datasets(query_lst, domain, level)
+
+    def print_list(self, the_lst):
+        ndx = 1 
+        for item in the_lst:
+            print('\t[{0}] {1}'.format(ndx, item))
+            ndx += 1
+    
+    def _print(self):
+        print(json.dumps(self._question_list, indent=2))
+
+    def get(self, level, domain):
+        return self._question_list[level][domain]
+    
+
diff --git a/gnqa/paper2_eval/src/run_questions.py b/gnqa/paper2_eval/src/run_questions.py
index 5e0b28b2..88053290 100644
--- a/gnqa/paper2_eval/src/run_questions.py
+++ b/gnqa/paper2_eval/src/run_questions.py
@@ -3,37 +3,24 @@ import sys
 import os
 
 from r2r import R2R, Document, GenerationConfig, R2RClient
+from document_operations import DocOps, QuestionList
 
-
-def writeDatasetFile(responses, outp_file):
-  print(outp_file)
-  output = json.dumps(responses, indent=2)
-  if os.path.exists(outp_file):
-    with open(outp_file, "a") as the_data:
-      the_data.write('' + output)
-  else:
-    with open(outp_file, "a") as the_data:
-      the_data.write(output)
-
-client = R2RClient("http://localhost:8000")
-
-health_resp = client.health()
-
-print("The R2R client's health status is {0}".format(health_resp))
-
-questions = [
-    "List as many studies as you can that include rapamycin.",
-    "Why is it so difficult to map gene loci that control aging in humans?"
-]
+'''
+*******************************************************************************
+Variables
+*******************************************************************************
+'''
 rag_response = {}
+client       = R2RClient("http://localhost:8000")
+health_resp  = client.health()
 
-ndx = 1
-for question in questions:
-    rag_response[str(ndx)] = client.rag(question)
-    ndx = ndx + 1
-
-#print(json.dumps(rag_response, indent=2))
+'''
+*******************************************************************************
+Commands
+*******************************************************************************
+'''
 
+print("The R2R client's health status is {0}".format(health_resp))
 
 try:
     read_file = str(sys.argv[1])
@@ -41,5 +28,11 @@ try:
 except:
     exit('Example use "python run_questions.py ../data/questions/human/de/aging.json ../data/responses/human/de/aging_resp.json"')
 
+qLst = QuestionList(read_file, 1) # second parameter is for verbose output
+ndx = 1
+for question in qLst.get("domainexpert","aging"):
+    print('Getting response for the following question --> {0}'.format(question))
+    rag_response[str(ndx)] = client.rag(question)
+    ndx += 1
 
-writeDatasetFile(rag_response, out_file)
\ No newline at end of file
+DocOps.writeDatasetFile(rag_response, out_file)
\ No newline at end of file
author	ShelbySolomonDarnell	2024-09-25 16:17:35 +0300
committer	ShelbySolomonDarnell	2024-09-25 16:17:35 +0300
commit	11a7a465afcaa875d5fdebe840e60b1e7be5ee76 (patch)
tree	50dd1369f7fa609577f28885e3c67459f9ad8380 /gnqa
parent	29593ada24d6847c91d643edcbc48607ff3ea9f8 (diff)
download	gn-ai-11a7a465afcaa875d5fdebe840e60b1e7be5ee76.tar.gz