From 50f0ed1d717d6877cb0562b1f2d54f0f242312d9 Mon Sep 17 00:00:00 2001
From: ShelbySolomonDarnell
Date: Fri, 16 Aug 2024 17:26:14 +0300
Subject: added paper2_eval

---
 gnqa/paper2_eval/src/parsejson.py | 63 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 gnqa/paper2_eval/src/parsejson.py

(limited to 'gnqa/paper2_eval/src/parsejson.py')

diff --git a/gnqa/paper2_eval/src/parsejson.py b/gnqa/paper2_eval/src/parsejson.py
new file mode 100644
index 00000000..b49a898a
--- /dev/null
+++ b/gnqa/paper2_eval/src/parsejson.py
@@ -0,0 +1,63 @@
+import json
+import sys
+
+
+def iterate_json(obj, thedict):
+    if isinstance(obj, dict):
+        for key, val in obj.items():
+            if (key == "text"):
+                thedict["contexts"].append(val.replace("\n", " ").strip())
+            elif (key == "answer"):
+                thedict["answer"] = val.replace("\n", " ").strip()
+            elif (key == "question"):
+                thedict["question"] = val.replace("\n", " ").strip()
+            else:
+                if (len(obj.items()) == 1 ):
+                    print(key, " --> ", val)
+            iterate_json(val, thedict)
+    elif isinstance(obj, list):
+        for item in obj:
+            iterate_json(item, thedict)
+
+def create_dataset_from_files(tag, file_name, rag_out):
+    for the_file in file_name[tag]:
+        ragas_output = {
+            "contexts": [],
+            "answer": "",
+            "question": ""}
+        #print(the_file)
+        with open("./data/"+the_file, "r") as r_file:
+            data_file = json.load(r_file)
+        iterate_json(data_file, ragas_output)
+        rag_out["answer"].append(ragas_output["answer"])
+        rag_out["question"].append(ragas_output["question"])
+        rag_out["contexts"].append(ragas_output["contexts"])
+
+def create_resultset_from_file(file_name):
+        with open("./data/"+the_file, "r") as r_file:
+            data_file = json.load(r_file)
+        iterate_json(data_file, ragas_output)
+
+
+file_list_tag = str(sys.argv[1])
+read_file = str(sys.argv[2]) # e.g. doc_list.json
+outp_file = str(sys.argv[3])
+
+rag_out = {
+    "question": [],
+    "answer": [],
+    "contexts": []
+}
+
+cntxt_lst = []
+
+# this should be a json file with a list of input files and an output file
+with open(read_file, "r") as r_file:
+    file_lst = json.load(r_file)
+
+create_dataset_from_files(file_list_tag, file_lst, rag_out)
+
+with open(outp_file, "a") as the_data:
+    #json.dump(ragas_output, the_data)
+    the_data.write(",\n")
+    the_data.write(json.dumps(rag_out, indent=2))
-- 
cgit 1.4.1