aboutsummaryrefslogtreecommitdiff
path: root/gnqa/paper1_eval/src/parsejson_ratings.py
diff options
context:
space:
mode:
Diffstat (limited to 'gnqa/paper1_eval/src/parsejson_ratings.py')
-rw-r--r--gnqa/paper1_eval/src/parsejson_ratings.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/gnqa/paper1_eval/src/parsejson_ratings.py b/gnqa/paper1_eval/src/parsejson_ratings.py
new file mode 100644
index 0000000..bd20417
--- /dev/null
+++ b/gnqa/paper1_eval/src/parsejson_ratings.py
@@ -0,0 +1,106 @@
+import json
+import sys
+
+"""
+This file converts the json report from GNQA into a list of individual users
+and their interactions with the system. At the moment we are getting their
+questions, the systems answers, and their ratings of the overall system response.
+Unfortunately the context is not saved with the answer.
+"""
+# report_data, ratings_dict
+def reorg_json_report(obj, resp_lst, ratings):
+ if isinstance(obj, dict):
+ user_id = ''
+ for key, val in obj.items():
+ if (key == "user_id"):
+ user_id = val
+ if isKeyInList(resp_lst, val) == 0:
+ resp_lst.append({val: ratings})
+ else:
+ print("\nKey {0} is already present".format(val))
+ elif (key in ["query","answer","weight","task_id"]):
+ ratings[key].append(val)
+ #else:
+ # print('These are the current ratings --> {0}'.format(ratings))
+ print('The ratings before being pushed to user_responses -> {0}'.format(ratings))
+ # add query to dictionary, if it is an update then don't update the ratings
+ qcount = query_dict.setdefault(ratings["query"][0], 0)
+ query_dict.update({ratings["query"][0]: qcount+1})
+ update_ratings(resp_lst, user_id, ratings)
+ if qcount == 0:
+ taskquery_dict.setdefault(ratings["task_id"][0], ratings["query"][0])
+ #update_ratings(resp_lst, user_id, ratings)
+ #reorg_json_report(val, resp_lst, ratings)
+ elif isinstance(obj, list):
+ for item in obj:
+ ratings = reset_ratings()
+ reorg_json_report(item, resp_lst, ratings)
+
+
+
+def create_resultset_from_file(resp_lst, file_name, output):
+ with open(file_name, "r") as r_file:
+ the_data = json.load(r_file)
+ reorg_json_report(the_data, resp_lst, output)
+
+def isKeyInList(the_lst, the_key):
+ result = 0
+ result_item = {}
+ for the_item in the_lst:
+ if the_key in the_item:
+ result = 1
+ result_item = the_item
+ return result, result_item
+
+def update_ratings(ratings, user_id, input_dict):
+ key_ndx, ratings_dict = isKeyInList(ratings, user_id)
+ if key_ndx == 0:
+ ratings.append({user_id: input_dict})
+ else:
+ for key, val in input_dict.items():
+ if isinstance(val,list):
+ ratings_dict[user_id][key].append(val[0])
+ else:
+ ratings_dict[user_id][key].append(val)
+
+def reset_ratings():
+ return {
+ "task_id": [],
+ "weight": [],
+ "answer": [],
+ "query": []
+ }
+
+user_responses = []
+ratings_out = {
+ "task_id": [],
+ "weight": [],
+ "answer": [],
+ "query": []
+}
+
+query_dict = {}
+taskquery_dict = {}
+
+try:
+ read_file = str(sys.argv[1]) # e.g. doc_list.json
+ outp_file = str(sys.argv[2])
+except:
+ exit('Example use "python3 parsejson_ratings.py data/ratings/2024_06_25-gnqa_responses.json data/ratings/[date]-out.json"')
+
+#print('The input file is {0}, the output file is {1}'.format(read_file, outp_file))
+
+create_resultset_from_file(user_responses, read_file, ratings_out)
+print('The number of users is {0}'.format(len(user_responses)))
+#print(json.dumps(ratings_out, indent=2))
+with open(outp_file, "a") as the_data:
+ the_data.write(",\n")
+ the_data.write(json.dumps(user_responses, indent=2))
+
+print("Greetings shabes!")
+print('There are {0} unique queries.'.format(len(taskquery_dict)))
+print(json.dumps(taskquery_dict, indent=2))
+#get number of users
+# get number of questions asked per user
+# get average ratings
+