From 50f0ed1d717d6877cb0562b1f2d54f0f242312d9 Mon Sep 17 00:00:00 2001 From: ShelbySolomonDarnell Date: Fri, 16 Aug 2024 17:26:14 +0300 Subject: added paper2_eval --- gnqa/paper2_eval/src/parsejson.py | 63 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 gnqa/paper2_eval/src/parsejson.py (limited to 'gnqa/paper2_eval/src/parsejson.py') diff --git a/gnqa/paper2_eval/src/parsejson.py b/gnqa/paper2_eval/src/parsejson.py new file mode 100644 index 0000000..b49a898 --- /dev/null +++ b/gnqa/paper2_eval/src/parsejson.py @@ -0,0 +1,63 @@ +import json +import sys + + +def iterate_json(obj, thedict): + if isinstance(obj, dict): + for key, val in obj.items(): + if (key == "text"): + thedict["contexts"].append(val.replace("\n", " ").strip()) + elif (key == "answer"): + thedict["answer"] = val.replace("\n", " ").strip() + elif (key == "question"): + thedict["question"] = val.replace("\n", " ").strip() + else: + if (len(obj.items()) == 1 ): + print(key, " --> ", val) + iterate_json(val, thedict) + elif isinstance(obj, list): + for item in obj: + iterate_json(item, thedict) + +def create_dataset_from_files(tag, file_name, rag_out): + for the_file in file_name[tag]: + ragas_output = { + "contexts": [], + "answer": "", + "question": ""} + #print(the_file) + with open("./data/"+the_file, "r") as r_file: + data_file = json.load(r_file) + iterate_json(data_file, ragas_output) + rag_out["answer"].append(ragas_output["answer"]) + rag_out["question"].append(ragas_output["question"]) + rag_out["contexts"].append(ragas_output["contexts"]) + +def create_resultset_from_file(file_name): + with open("./data/"+the_file, "r") as r_file: + data_file = json.load(r_file) + iterate_json(data_file, ragas_output) + + +file_list_tag = str(sys.argv[1]) +read_file = str(sys.argv[2]) # e.g. doc_list.json +outp_file = str(sys.argv[3]) + +rag_out = { + "question": [], + "answer": [], + "contexts": [] +} + +cntxt_lst = [] + +# this should be a json file with a list of input files and an output file +with open(read_file, "r") as r_file: + file_lst = json.load(r_file) + +create_dataset_from_files(file_list_tag, file_lst, rag_out) + +with open(outp_file, "a") as the_data: + #json.dump(ragas_output, the_data) + the_data.write(",\n") + the_data.write(json.dumps(rag_out, indent=2)) -- cgit v1.2.3