import json import sys verbose = 1 read_file = '/data/code/gn-ai/gnqa/paper2_eval/data/rag_out_1.json' values_key = { "text" : {"name": "contexts", "append": 1}, "associatedQuery": {"name": "question", "append": 0}, "id": {"name": "id", "append": 1}, "title": {"name": "titles", "append": 1}, "document_id": {"name": "document_id", "append": 1}, "extraction_id": {"name": "extraction_id", "append": 1}, "content": {"name": "answer", "append": 0} } def get_ragas_out_dict(): return { "titles": [], "extraction_id": [], "document_id": [], "id": [], "contexts": [], "answer": "", "question": ""} def extract_response(obj, values_key, thedict): if isinstance(obj, dict): for key, val in obj.items(): if (key in values_key.keys()): if (values_key[key]["append"]): thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip()) else: thedict[values_key[key]["name"]] = val.replace("\n", " ").strip() print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose]) else: if (len(obj.items()) == 1 ): print(key, " --> ", val) extract_response(val, values_key, thedict) elif isinstance(obj, list): for item in obj: extract_response(item, values_key, thedict) # this should be a json file with a list of input files and an output file with open(read_file, "r") as r_file: result_file = json.load(r_file) ragas_output = { "contexts": [], "titles": [], "answer": "", "question": ""} extract_response(result_file, values_key, ragas_output) print(json.dumps(ragas_output, indent=2))