about summary refs log tree commit diff
path: root/gnqa/paper2_eval/src/parse_r2r_result.py
diff options
context:
space:
mode:
authorShelbySolomonDarnell2024-09-24 23:40:11 +0300
committerShelbySolomonDarnell2024-09-24 23:40:11 +0300
commitcb28feac47dc1f6147260f1bc057970c54b314f3 (patch)
tree6c45082bc4d723468609f774170339c718576873 /gnqa/paper2_eval/src/parse_r2r_result.py
parentea942f68346abcd6e51d1cc96b0c90361c3cdfa2 (diff)
downloadgn-ai-cb28feac47dc1f6147260f1bc057970c54b314f3.tar.gz
Human questions in json format, code for formatting r2r response
Diffstat (limited to 'gnqa/paper2_eval/src/parse_r2r_result.py')
-rw-r--r--gnqa/paper2_eval/src/parse_r2r_result.py44
1 files changed, 44 insertions, 0 deletions
diff --git a/gnqa/paper2_eval/src/parse_r2r_result.py b/gnqa/paper2_eval/src/parse_r2r_result.py
new file mode 100644
index 00000000..b30f2e76
--- /dev/null
+++ b/gnqa/paper2_eval/src/parse_r2r_result.py
@@ -0,0 +1,44 @@
+import json
+import sys
+
+read_file = '/data/code/gn-ai/gnqa/paper2_eval/data/rag_out_1.json'
+
+def iterate_json(obj, thedict):
+    if isinstance(obj, dict):
+        for key, val in obj.items():
+            if (key == "text"):
+                thedict["contexts"].append(val.replace("\n", " ").strip())
+                print("Key -> {0}\tValue -> {1}".format(key,val))
+            elif (key == "metadata"):
+                thedict["answer"] = val#.replace("\n", " ").strip()
+                print("Key -> {0}\tValue -> {1}".format(key,val))
+            elif (key == "id"):
+                print("Key -> {0}\tValue -> {1}".format(key,val))
+            elif (key == "associatedQuery"):
+                thedict["question"] = val.replace("\n", " ").strip()
+                print("Key -> {0}\tValue -> {1}".format(key,val))
+            elif (key == "title"):
+                print("Key -> {0}\tValue -> {1}".format(key,val))
+            elif (key == "document_id"):
+                print("Key -> {0}\tValue -> {1}".format(key,val))
+            else:
+                if (len(obj.items()) == 1 ):
+                    print(key, " --> ", val)
+            iterate_json(val, thedict)
+    elif isinstance(obj, list):
+        for item in obj:
+            iterate_json(item, thedict)
+
+# this should be a json file with a list of input files and an output file
+with open(read_file, "r") as r_file:
+    result_file = json.load(r_file)
+
+ragas_output = {
+    "contexts": [],
+    "titles": [],
+    "answer": "",
+    "question": ""}
+vector_search_results = result_file["vector_search_results"]
+iterate_json(vector_search_results, ragas_output)
+
+print(json.dumps(ragas_output, indent=2))
\ No newline at end of file