diff options
Diffstat (limited to 'gnqa/paper2_eval/src/parse_r2r_result.py')
-rw-r--r-- | gnqa/paper2_eval/src/parse_r2r_result.py | 51 |
1 files changed, 31 insertions, 20 deletions
diff --git a/gnqa/paper2_eval/src/parse_r2r_result.py b/gnqa/paper2_eval/src/parse_r2r_result.py index b30f2e7..a958629 100644 --- a/gnqa/paper2_eval/src/parse_r2r_result.py +++ b/gnqa/paper2_eval/src/parse_r2r_result.py @@ -1,33 +1,45 @@ import json import sys +verbose = 1 + read_file = '/data/code/gn-ai/gnqa/paper2_eval/data/rag_out_1.json' -def iterate_json(obj, thedict): +values_key = { + "text" : {"name": "contexts", "append": 1}, + "associatedQuery": {"name": "question", "append": 0}, + "id": {"name": "id", "append": 1}, + "title": {"name": "titles", "append": 1}, + "document_id": {"name": "document_id", "append": 1}, + "extraction_id": {"name": "extraction_id", "append": 1}, + "content": {"name": "answer", "append": 0} +} + +def get_ragas_out_dict(): + return { "titles": [], + "extraction_id": [], + "document_id": [], + "id": [], + "contexts": [], + "answer": "", + "question": ""} + +def extract_response(obj, values_key, thedict): if isinstance(obj, dict): for key, val in obj.items(): - if (key == "text"): - thedict["contexts"].append(val.replace("\n", " ").strip()) - print("Key -> {0}\tValue -> {1}".format(key,val)) - elif (key == "metadata"): - thedict["answer"] = val#.replace("\n", " ").strip() - print("Key -> {0}\tValue -> {1}".format(key,val)) - elif (key == "id"): - print("Key -> {0}\tValue -> {1}".format(key,val)) - elif (key == "associatedQuery"): - thedict["question"] = val.replace("\n", " ").strip() - print("Key -> {0}\tValue -> {1}".format(key,val)) - elif (key == "title"): - print("Key -> {0}\tValue -> {1}".format(key,val)) - elif (key == "document_id"): - print("Key -> {0}\tValue -> {1}".format(key,val)) + if (key in values_key.keys()): + if (values_key[key]["append"]): + thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip()) + else: + thedict[values_key[key]["name"]] = val.replace("\n", " ").strip() + print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose]) else: if (len(obj.items()) == 1 ): print(key, " --> ", val) - iterate_json(val, thedict) + extract_response(val, values_key, thedict) elif isinstance(obj, list): for item in obj: - iterate_json(item, thedict) + extract_response(item, values_key, thedict) # this should be a json file with a list of input files and an output file with open(read_file, "r") as r_file: @@ -38,7 +50,6 @@ ragas_output = { "titles": [], "answer": "", "question": ""} -vector_search_results = result_file["vector_search_results"] -iterate_json(vector_search_results, ragas_output) +extract_response(result_file, values_key, ragas_output) print(json.dumps(ragas_output, indent=2))
\ No newline at end of file |