aboutsummaryrefslogtreecommitdiff
path: root/gnqa/src/study2/parse_r2r_result.py
diff options
context:
space:
mode:
Diffstat (limited to 'gnqa/src/study2/parse_r2r_result.py')
-rw-r--r--gnqa/src/study2/parse_r2r_result.py63
1 files changed, 63 insertions, 0 deletions
diff --git a/gnqa/src/study2/parse_r2r_result.py b/gnqa/src/study2/parse_r2r_result.py
new file mode 100644
index 0000000..5cba6d3
--- /dev/null
+++ b/gnqa/src/study2/parse_r2r_result.py
@@ -0,0 +1,63 @@
+import json
+import sys
+from study2.document_operations import DocOps, QuestionList
+
+verbose = 0
+#read_file = '/home/shebes/Coding/gn-ai/gnqa/paper2_eval/data/testresp2.json'
+read_file = '/home/shebes/Coding/gn-ai/gnqa/paper2_eval/data/responses/human/cs_diabetes_responses.json'
+out_file = '../data/dataset/human/intermediate_files/human_cs_diabetes_'
+
+values_key = {
+ "text" : {"name": "contexts", "append": 1},
+ "associatedQuery": {"name": "question", "append": 0},
+ "id": {"name": "id", "append": 1},
+ "title": {"name": "titles", "append": 1},
+ "document_id": {"name": "document_id", "append": 1},
+ "extraction_id": {"name": "extraction_id", "append": 1},
+ "content": {"name": "answer", "append": 0}
+}
+
+def get_ragas_out_dict():
+ return { "titles": [],
+ "extraction_id": [],
+ "document_id": [],
+ "id": [],
+ "contexts": [],
+ "answer": "",
+ "question": ""}
+
+def extract_response(obj, values_key, thedict):
+ if isinstance(obj, dict):
+ for key, val in obj.items():
+ if (key in values_key.keys()):
+ if (values_key[key]["append"]):
+ thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip())
+ else:
+ thedict[values_key[key]["name"]] = val.replace("\n", " ").strip()
+ print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose])
+ else:
+ if (len(obj.items()) == 1 ):
+ print(key, " --> ", val)
+ extract_response(val, values_key, thedict)
+ elif isinstance(obj, list):
+ for item in obj:
+ extract_response(item, values_key, thedict)
+
+# this should be a json file with a list of input files and an output file
+with open(read_file, "r") as r_file:
+ result_file = json.load(r_file)
+
+ragas_output = {
+ "titles": [],
+ "extraction_id": [],
+ "document_id": [],
+ "id": [],
+ "contexts": [],
+ "answer": "",
+ "question": ""}
+
+print('There are {0} keys in the result file'.format(result_file.keys()))
+for key in result_file.keys():
+ eval_dataset_dict = get_ragas_out_dict()
+ extract_response(result_file[key], values_key, eval_dataset_dict)
+ DocOps.writeDatasetFile(eval_dataset_dict, '{0}{1}'.format(out_file, key)) \ No newline at end of file