1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
import json
import sys
from study2.document_operations import DocOps, QuestionList
verbose = 0
#read_file = '/home/shebes/Coding/gn-ai/gnqa/paper2_eval/data/testresp2.json'
read_file = '/home/shebes/Coding/gn-ai/gnqa/paper2_eval/data/responses/human/cs_diabetes_responses.json'
out_file = '../data/dataset/human/intermediate_files/human_cs_diabetes_'
values_key = {
"text" : {"name": "contexts", "append": 1},
"associatedQuery": {"name": "question", "append": 0},
"id": {"name": "id", "append": 1},
"title": {"name": "titles", "append": 1},
"document_id": {"name": "document_id", "append": 1},
"extraction_id": {"name": "extraction_id", "append": 1},
"content": {"name": "answer", "append": 0}
}
def get_ragas_out_dict():
return { "titles": [],
"extraction_id": [],
"document_id": [],
"id": [],
"contexts": [],
"answer": "",
"question": ""}
def extract_response(obj, values_key, thedict):
if isinstance(obj, dict):
for key, val in obj.items():
if (key in values_key.keys()):
if (values_key[key]["append"]):
thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip())
else:
thedict[values_key[key]["name"]] = val.replace("\n", " ").strip()
print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose])
else:
if (len(obj.items()) == 1 ):
print(key, " --> ", val)
extract_response(val, values_key, thedict)
elif isinstance(obj, list):
for item in obj:
extract_response(item, values_key, thedict)
# this should be a json file with a list of input files and an output file
with open(read_file, "r") as r_file:
result_file = json.load(r_file)
ragas_output = {
"titles": [],
"extraction_id": [],
"document_id": [],
"id": [],
"contexts": [],
"answer": "",
"question": ""}
print('There are {0} keys in the result file'.format(result_file.keys()))
for key in result_file.keys():
eval_dataset_dict = get_ragas_out_dict()
extract_response(result_file[key], values_key, eval_dataset_dict)
DocOps.writeDatasetFile(eval_dataset_dict, '{0}{1}'.format(out_file, key))
|