1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
import json
import sys
def iterate_json(obj, thedict):
if isinstance(obj, dict):
for key, val in obj.items():
if (key == "text"):
thedict["contexts"].append(val.replace("\n", " ").strip())
elif (key == "answer"):
thedict["answer"] = val.replace("\n", " ").strip()
elif (key == "question"):
thedict["question"] = val.replace("\n", " ").strip()
else:
if (len(obj.items()) == 1 ):
print(key, " --> ", val)
iterate_json(val, thedict)
elif isinstance(obj, list):
for item in obj:
iterate_json(item, thedict)
def create_dataset_from_files(tag, file_name, rag_out):
for the_file in file_name[tag]:
ragas_output = {
"contexts": [],
"answer": "",
"question": ""}
#print(the_file)
with open("./data/"+the_file, "r") as r_file:
data_file = json.load(r_file)
iterate_json(data_file, ragas_output)
rag_out["answer"].append(ragas_output["answer"])
rag_out["question"].append(ragas_output["question"])
rag_out["contexts"].append(ragas_output["contexts"])
def create_resultset_from_file(file_name):
with open("./data/"+the_file, "r") as r_file:
data_file = json.load(r_file)
iterate_json(data_file, ragas_output)
file_list_tag = str(sys.argv[1])
read_file = str(sys.argv[2]) # e.g. doc_list.json
outp_file = str(sys.argv[3])
rag_out = {
"question": [],
"answer": [],
"contexts": []
}
cntxt_lst = []
# this should be a json file with a list of input files and an output file
with open(read_file, "r") as r_file:
file_lst = json.load(r_file)
create_dataset_from_files(file_list_tag, file_lst, rag_out)
with open(outp_file, "a") as the_data:
#json.dump(ragas_output, the_data)
the_data.write(",\n")
the_data.write(json.dumps(rag_out, indent=2))
|