From 11a7a465afcaa875d5fdebe840e60b1e7be5ee76 Mon Sep 17 00:00:00 2001 From: ShelbySolomonDarnell Date: Wed, 25 Sep 2024 16:17:35 +0300 Subject: Reading question list, processing response and creating output file. --- .gitignore | 2 + gnqa/paper2_eval/src/document_operations.py | 84 +++++++++++++++++++++++++++++ gnqa/paper2_eval/src/run_questions.py | 49 ++++++++--------- 3 files changed, 107 insertions(+), 28 deletions(-) create mode 100644 .gitignore create mode 100644 gnqa/paper2_eval/src/document_operations.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3867aa4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +gnqa/paper2_eval/data/testresp2.json +gnqa/paper2_eval/src/__pycache__/document_operations.cpython-310.pyc diff --git a/gnqa/paper2_eval/src/document_operations.py b/gnqa/paper2_eval/src/document_operations.py new file mode 100644 index 0000000..0c3522f --- /dev/null +++ b/gnqa/paper2_eval/src/document_operations.py @@ -0,0 +1,84 @@ +import os +import sys +import json +import time +import configparser + +from r2r import R2R, Document, GenerationConfig, R2RClient + +class DocOps: + _type = '' + + def __init__(self): + self._type = 'QuestionList' + + def writeDatasetFile(responses, outp_file): + print(outp_file) + output = json.dumps(responses, indent=2) + if os.path.exists(outp_file): + with open(outp_file, "a") as the_data: + the_data.write('' + output) + else: + with open(outp_file, "a") as the_data: + the_data.write(output) + +class QuestionList: + _verbose = 0 + _doc = '' + _fname = '' + _question_list = { + "domainexpert": { + "gn": [], + "aging": [], + "diabetes": [] + }, + "citizenscientist": { + "gn": [], + "aging": [], + "diabetes": [] + } + } + + def __init__(self, the_file, verbose=0): + print('QuestionList has been initialized {0}, verbosity is {1}'.format(the_file, verbose)) + self._fname = the_file + self._verbose = verbose + self.read_document() + self.parse_document() + #self._print() + + def read_document(self): + with open(self._fname, "r") as r_file: + self._doc = json.load(r_file) + + def reset_responses(): + return { + 'question': [], + 'answer': [], + 'contexts': [], + 'task_id': [] + } + + def parse_document(self): + print(('', '\nParse question list') [self._verbose] ) + for item in self._doc: + level = item['level'] + domain = item['domain'] + query_lst = item['query'] + self._question_list[level][domain] = query_lst + #print(('', 'Level --> {0} \tDomain --> {1}\n{2}'.format(level, domain, self.print_list(query_lst))) [self._verbose]) + #create_datasets(query_lst, domain, level) + + def print_list(self, the_lst): + ndx = 1 + for item in the_lst: + print('\t[{0}] {1}'.format(ndx, item)) + ndx += 1 + + def _print(self): + print(json.dumps(self._question_list, indent=2)) + + def get(self, level, domain): + return self._question_list[level][domain] + + diff --git a/gnqa/paper2_eval/src/run_questions.py b/gnqa/paper2_eval/src/run_questions.py index 5e0b28b..8805329 100644 --- a/gnqa/paper2_eval/src/run_questions.py +++ b/gnqa/paper2_eval/src/run_questions.py @@ -3,37 +3,24 @@ import sys import os from r2r import R2R, Document, GenerationConfig, R2RClient +from document_operations import DocOps, QuestionList - -def writeDatasetFile(responses, outp_file): - print(outp_file) - output = json.dumps(responses, indent=2) - if os.path.exists(outp_file): - with open(outp_file, "a") as the_data: - the_data.write('' + output) - else: - with open(outp_file, "a") as the_data: - the_data.write(output) - -client = R2RClient("http://localhost:8000") - -health_resp = client.health() - -print("The R2R client's health status is {0}".format(health_resp)) - -questions = [ - "List as many studies as you can that include rapamycin.", - "Why is it so difficult to map gene loci that control aging in humans?" -] +''' +******************************************************************************* +Variables +******************************************************************************* +''' rag_response = {} +client = R2RClient("http://localhost:8000") +health_resp = client.health() -ndx = 1 -for question in questions: - rag_response[str(ndx)] = client.rag(question) - ndx = ndx + 1 - -#print(json.dumps(rag_response, indent=2)) +''' +******************************************************************************* +Commands +******************************************************************************* +''' +print("The R2R client's health status is {0}".format(health_resp)) try: read_file = str(sys.argv[1]) @@ -41,5 +28,11 @@ try: except: exit('Example use "python run_questions.py ../data/questions/human/de/aging.json ../data/responses/human/de/aging_resp.json"') +qLst = QuestionList(read_file, 1) # second parameter is for verbose output +ndx = 1 +for question in qLst.get("domainexpert","aging"): + print('Getting response for the following question --> {0}'.format(question)) + rag_response[str(ndx)] = client.rag(question) + ndx += 1 -writeDatasetFile(rag_response, out_file) \ No newline at end of file +DocOps.writeDatasetFile(rag_response, out_file) \ No newline at end of file -- cgit v1.2.3