diff options
Diffstat (limited to 'gnqa/src/study2/parse_r2r_result.py')
-rw-r--r-- | gnqa/src/study2/parse_r2r_result.py | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/gnqa/src/study2/parse_r2r_result.py b/gnqa/src/study2/parse_r2r_result.py new file mode 100644 index 0000000..5cba6d3 --- /dev/null +++ b/gnqa/src/study2/parse_r2r_result.py @@ -0,0 +1,63 @@ +import json +import sys +from study2.document_operations import DocOps, QuestionList + +verbose = 0 +#read_file = '/home/shebes/Coding/gn-ai/gnqa/paper2_eval/data/testresp2.json' +read_file = '/home/shebes/Coding/gn-ai/gnqa/paper2_eval/data/responses/human/cs_diabetes_responses.json' +out_file = '../data/dataset/human/intermediate_files/human_cs_diabetes_' + +values_key = { + "text" : {"name": "contexts", "append": 1}, + "associatedQuery": {"name": "question", "append": 0}, + "id": {"name": "id", "append": 1}, + "title": {"name": "titles", "append": 1}, + "document_id": {"name": "document_id", "append": 1}, + "extraction_id": {"name": "extraction_id", "append": 1}, + "content": {"name": "answer", "append": 0} +} + +def get_ragas_out_dict(): + return { "titles": [], + "extraction_id": [], + "document_id": [], + "id": [], + "contexts": [], + "answer": "", + "question": ""} + +def extract_response(obj, values_key, thedict): + if isinstance(obj, dict): + for key, val in obj.items(): + if (key in values_key.keys()): + if (values_key[key]["append"]): + thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip()) + else: + thedict[values_key[key]["name"]] = val.replace("\n", " ").strip() + print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose]) + else: + if (len(obj.items()) == 1 ): + print(key, " --> ", val) + extract_response(val, values_key, thedict) + elif isinstance(obj, list): + for item in obj: + extract_response(item, values_key, thedict) + +# this should be a json file with a list of input files and an output file +with open(read_file, "r") as r_file: + result_file = json.load(r_file) + +ragas_output = { + "titles": [], + "extraction_id": [], + "document_id": [], + "id": [], + "contexts": [], + "answer": "", + "question": ""} + +print('There are {0} keys in the result file'.format(result_file.keys())) +for key in result_file.keys(): + eval_dataset_dict = get_ragas_out_dict() + extract_response(result_file[key], values_key, eval_dataset_dict) + DocOps.writeDatasetFile(eval_dataset_dict, '{0}{1}'.format(out_file, key))
\ No newline at end of file |