aboutsummaryrefslogtreecommitdiff
path: root/gnqa/paper2_eval/src/parse_r2r_result.py
blob: a958629dbe9384ebb01bb6fd40adfac281837693 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import json
import sys

verbose = 1

read_file = '/data/code/gn-ai/gnqa/paper2_eval/data/rag_out_1.json'

values_key = {
    "text" :           {"name": "contexts",      "append": 1},
    "associatedQuery": {"name": "question",      "append": 0},
    "id":              {"name": "id",            "append": 1},
    "title":           {"name": "titles",        "append": 1},
    "document_id":     {"name": "document_id",   "append": 1},
    "extraction_id":   {"name": "extraction_id", "append": 1},
    "content":         {"name": "answer",        "append": 0}
}

def get_ragas_out_dict():
    return { "titles":        [],
             "extraction_id": [],
             "document_id":   [],
             "id":            [],
             "contexts":      [],
             "answer":        "",
             "question":      ""}

def extract_response(obj, values_key, thedict):
    if isinstance(obj, dict):
        for key, val in obj.items():
            if (key in values_key.keys()):
                if (values_key[key]["append"]):
                    thedict[values_key[key]["name"]].append(val.replace("\n", " ").strip())
                else:
                    thedict[values_key[key]["name"]] = val.replace("\n", " ").strip()
                print(("", "Key -> {0}\tValue -> {1}".format(key,val)) [verbose])
            else:
                if (len(obj.items()) == 1 ):
                    print(key, " --> ", val)
            extract_response(val, values_key, thedict)
    elif isinstance(obj, list):
        for item in obj:
            extract_response(item, values_key, thedict)

# this should be a json file with a list of input files and an output file
with open(read_file, "r") as r_file:
    result_file = json.load(r_file)

ragas_output = {
    "contexts": [],
    "titles": [],
    "answer": "",
    "question": ""}
extract_response(result_file, values_key, ragas_output)

print(json.dumps(ragas_output, indent=2))