import json import sys read_file = '/data/code/gn-ai/gnqa/paper2_eval/data/rag_out_1.json' def iterate_json(obj, thedict): if isinstance(obj, dict): for key, val in obj.items(): if (key == "text"): thedict["contexts"].append(val.replace("\n", " ").strip()) print("Key -> {0}\tValue -> {1}".format(key,val)) elif (key == "metadata"): thedict["answer"] = val#.replace("\n", " ").strip() print("Key -> {0}\tValue -> {1}".format(key,val)) elif (key == "id"): print("Key -> {0}\tValue -> {1}".format(key,val)) elif (key == "associatedQuery"): thedict["question"] = val.replace("\n", " ").strip() print("Key -> {0}\tValue -> {1}".format(key,val)) elif (key == "title"): print("Key -> {0}\tValue -> {1}".format(key,val)) elif (key == "document_id"): print("Key -> {0}\tValue -> {1}".format(key,val)) else: if (len(obj.items()) == 1 ): print(key, " --> ", val) iterate_json(val, thedict) elif isinstance(obj, list): for item in obj: iterate_json(item, thedict) # this should be a json file with a list of input files and an output file with open(read_file, "r") as r_file: result_file = json.load(r_file) ragas_output = { "contexts": [], "titles": [], "answer": "", "question": ""} vector_search_results = result_file["vector_search_results"] iterate_json(vector_search_results, ragas_output) print(json.dumps(ragas_output, indent=2))