aboutsummaryrefslogtreecommitdiff
path: root/gnqa/paper1_eval/src/data/results/gpt4o
diff options
context:
space:
mode:
authorSoloDShelby2024-07-19 14:41:40 +0300
committerSoloDShelby2024-07-19 14:41:40 +0300
commit3fa31b50af2861382fbe2c76406f5a04c3fefc93 (patch)
tree34d581648b0e0d3fc8dbe6577752a4fd433a3258 /gnqa/paper1_eval/src/data/results/gpt4o
parent74616897e30c7daafe5e74d34073466464921316 (diff)
downloadgn-ai-3fa31b50af2861382fbe2c76406f5a04c3fefc93.tar.gz
Evaluation code for paper 1
Diffstat (limited to 'gnqa/paper1_eval/src/data/results/gpt4o')
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_2.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_3.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_4.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_2.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_3.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_4.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_3.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_4.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_2.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_3.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_4.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_1.json20
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_2.json20
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_3.json20
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_4.json20
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_gn_3.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/gpt4o/scores_cs_diabetes.json37
21 files changed, 421 insertions, 0 deletions
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_1.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_1.json
new file mode 100644
index 0000000..017d467
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.9,
+ "context_utilization": 0.9070781944697044,
+ "context_relevancy": 0.2509564217695168,
+ "answer_relevancy": 0.9766358986013376
+},
+{
+ "faithfulness": 0.9,
+ "context_utilization": 0.9070781944697044,
+ "context_relevancy": 0.39381356462665973,
+ "answer_relevancy": 0.9825656372129992
+},
+{
+ "faithfulness": 0.9,
+ "context_utilization": 0.9104451978368653,
+ "context_relevancy": 0.39381356462665973,
+ "answer_relevancy": 0.973147869814394
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_2.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_2.json
new file mode 100644
index 0000000..16e0754
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_2.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.999999999991935,
+ "context_relevancy": 0.135272921108742,
+ "answer_relevancy": 0.9479744529828181
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.999999999991935,
+ "context_relevancy": 0.135272921108742,
+ "answer_relevancy": 0.951711024285933
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.999999999991935,
+ "context_relevancy": 0.14987988628287136,
+ "answer_relevancy": 0.9541549710773409
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_3.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_3.json
new file mode 100644
index 0000000..566613d
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_3.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.97675568021047,
+ "context_relevancy": 0.2259505726726024,
+ "answer_relevancy": 0.9448278057931704
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.97675568021047,
+ "context_relevancy": 0.21568920951760603,
+ "answer_relevancy": 0.9444115188658463
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.97675568021047,
+ "context_relevancy": 0.22922926119719259,
+ "answer_relevancy": 0.9444470134072755
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_4.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_4.json
new file mode 100644
index 0000000..61632cf
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_aging_4.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.9375,
+ "context_utilization": 0.9456511261659628,
+ "context_relevancy": 0.19499540357020145,
+ "answer_relevancy": 0.9422926379891006
+},
+{
+ "faithfulness": 0.9375,
+ "context_utilization": 0.9213036834852352,
+ "context_relevancy": 0.18966624996518577,
+ "answer_relevancy": 0.9493955674020345
+},
+{
+ "faithfulness": 0.9375,
+ "context_utilization": 0.9213036834852352,
+ "context_relevancy": 0.19896857554658115,
+ "answer_relevancy": 0.9454532501945042
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_1.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_1.json
new file mode 100644
index 0000000..63646cf
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.8533333333333333,
+ "context_utilization": 0.9438491717704647,
+ "context_relevancy": 0.20436440992383947,
+ "answer_relevancy": 0.957861571692806
+},
+{
+ "faithfulness": 0.8355555555555556,
+ "context_utilization": 0.9438491717704647,
+ "context_relevancy": 0.2012874868469164,
+ "answer_relevancy": 0.9533191002746577
+},
+{
+ "faithfulness": 0.8533333333333333,
+ "context_utilization": 0.9438491717704647,
+ "context_relevancy": 0.18389618249909034,
+ "answer_relevancy": 0.9498105973186146
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_2.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_2.json
new file mode 100644
index 0000000..02fe10f
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_2.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.9583333333333334,
+ "context_utilization": 0.7194444444356269,
+ "context_relevancy": 0.45524315840105317,
+ "answer_relevancy": 0.9496830965502638
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.7220833333238528,
+ "context_relevancy": 0.3970421001999949,
+ "answer_relevancy": 0.947827635665291
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.7194444444356269,
+ "context_relevancy": 0.3941849573428521,
+ "answer_relevancy": 0.9388702679644993
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_3.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_3.json
new file mode 100644
index 0000000..6566e51
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_3.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9237332568786083,
+ "context_relevancy": 0.2418398640689662,
+ "answer_relevancy": 0.9914901338443677
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9237332568786083,
+ "context_relevancy": 0.2352516287748486,
+ "answer_relevancy": 0.9926324858517163
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9295047961859101,
+ "context_relevancy": 0.2352516287748486,
+ "answer_relevancy": 0.9942151664950669
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_4.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_4.json
new file mode 100644
index 0000000..29e72c0
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_diabetes_4.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.8382274392203959,
+ "context_relevancy": 0.21850226437090842,
+ "answer_relevancy": 0.9268774561175513
+},
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.8289482840320825,
+ "context_relevancy": 0.21792356066720475,
+ "answer_relevancy": 0.9264507966486306
+},
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.8382274392203959,
+ "context_relevancy": 0.22104856066720474,
+ "answer_relevancy": 0.9306530537050953
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_1.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_1.json
new file mode 100644
index 0000000..25a71b0
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.19999999999900003,
+ "context_relevancy": 0.05,
+ "answer_relevancy": 0.1823656883581401
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.19999999999900003,
+ "context_relevancy": 0.05,
+ "answer_relevancy": 0.1823656883581401
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.19999999999900003,
+ "context_relevancy": 0.05,
+ "answer_relevancy": 0.1823656883581401
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_3.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_3.json
new file mode 100644
index 0000000..580e854
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_3.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.5999999999959664,
+ "context_relevancy": 0.22450090744101633,
+ "answer_relevancy": 0.562411241022707
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.5999999999959664,
+ "context_relevancy": 0.1687443284936479,
+ "answer_relevancy": 0.5643801560995779
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.5999999999959664,
+ "context_relevancy": 0.1687443284936479,
+ "answer_relevancy": 0.5617108358354678
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_4.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_4.json
new file mode 100644
index 0000000..bcfc652
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_cs_gn_4.json
@@ -0,0 +1,19 @@
+[
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.19999999999882354,
+ "context_relevancy": 0.065625,
+ "answer_relevancy": 0.1834019127645967
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.1999999999988889,
+ "context_relevancy": 0.065625,
+ "answer_relevancy": 0.18443207660654864
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.19999999999882354,
+ "context_relevancy": 0.065625,
+ "answer_relevancy": 0.18442316533105405
+}]
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_1.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_1.json
new file mode 100644
index 0000000..f719092
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.9479350312277262,
+ "context_relevancy": 0.21303541253345637,
+ "answer_relevancy": 0.9224404704070004
+},
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.9479350312277262,
+ "context_relevancy": 0.21303541253345637,
+ "answer_relevancy": 0.9204895776596349
+},
+{
+ "faithfulness": 0.975,
+ "context_utilization": 0.9479350312277262,
+ "context_relevancy": 0.21303541253345637,
+ "answer_relevancy": 0.9233177482569399
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_2.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_2.json
new file mode 100644
index 0000000..6539d02
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_2.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999917659,
+ "context_relevancy": 0.12455653962641092,
+ "answer_relevancy": 0.9215002061256425
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999917659,
+ "context_relevancy": 0.11027082534069661,
+ "answer_relevancy": 0.9238905660966263
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999917659,
+ "context_relevancy": 0.10345264352251479,
+ "answer_relevancy": 0.9236938936685843
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_3.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_3.json
new file mode 100644
index 0000000..13c967f
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_3.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9017950700460371,
+ "context_relevancy": 0.15025391166567637,
+ "answer_relevancy": 0.9080233205044008
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9017950700460371,
+ "context_relevancy": 0.1521235888294712,
+ "answer_relevancy": 0.9183172871520828
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9017950700460371,
+ "context_relevancy": 0.14271182412358882,
+ "answer_relevancy": 0.914051539296523
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_4.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_4.json
new file mode 100644
index 0000000..b40e032
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_aging_4.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.873908075621365,
+ "context_relevancy": 0.13236286714496703,
+ "answer_relevancy": 0.9379656935564172
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.873908075621365,
+ "context_relevancy": 0.13236286714496703,
+ "answer_relevancy": 0.9291571366744364
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.873908075621365,
+ "context_relevancy": 0.13236286714496703,
+ "answer_relevancy": 0.9374908833538264
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_1.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_1.json
new file mode 100644
index 0000000..d06530b
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_1.json
@@ -0,0 +1,20 @@
+[
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9898660740877201,
+ "context_relevancy": 0.31265901349702185,
+ "answer_relevancy": 0.9236030246314068
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9898660740877201,
+ "context_relevancy": 0.14113303947104788,
+ "answer_relevancy": 0.9150252742414604
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9728819471034,
+ "context_relevancy": 0.13863303947104788,
+ "answer_relevancy": 0.9148789006153158
+}
+]
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_2.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_2.json
new file mode 100644
index 0000000..e9fee86
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_2.json
@@ -0,0 +1,20 @@
+[
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.7124087573371619,
+ "context_relevancy": 0.22621316914080075,
+ "answer_relevancy": 0.9046933431898141
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.7004998969667501,
+ "context_relevancy": 0.23871316914080074,
+ "answer_relevancy": 0.9058328551471282
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.7124087573371619,
+ "context_relevancy": 0.24675410481331536,
+ "answer_relevancy": 0.9079384840142384
+}
+]
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_3.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_3.json
new file mode 100644
index 0000000..e39107d
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_3.json
@@ -0,0 +1,20 @@
+[
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.7479011200345999,
+ "context_relevancy": 0.2814642730385713,
+ "answer_relevancy": 0.8930647394153285
+},
+{
+ "faithfulness": 0.9099999999999999,
+ "context_utilization": 0.7479011200345999,
+ "context_relevancy": 0.2814642730385713,
+ "answer_relevancy": 0.896847471293901
+},
+{
+ "faithfulness": 0.9099999999999999,
+ "context_utilization": 0.7479011200345999,
+ "context_relevancy": 0.2814642730385713,
+ "answer_relevancy": 0.8912330225043821
+}
+]
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_4.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_4.json
new file mode 100644
index 0000000..2be82a9
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_diabetes_4.json
@@ -0,0 +1,20 @@
+[
+{
+ "faithfulness": 0.9333333333333332,
+ "context_utilization": 0.7297725885164278,
+ "context_relevancy": 0.17196237023200656,
+ "answer_relevancy": 0.8650648136737542
+},
+{
+ "faithfulness": 0.9333333333333332,
+ "context_utilization": 0.7297725885164278,
+ "context_relevancy": 0.19056702139479725,
+ "answer_relevancy": 0.877389474552466
+},
+{
+ "faithfulness": 0.9333333333333332,
+ "context_utilization": 0.7297725885164278,
+ "context_relevancy": 0.12413628327548483,
+ "answer_relevancy": 0.8783898419790906
+}
+]
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_gn_3.json b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_gn_3.json
new file mode 100644
index 0000000..8f33b47
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/gpt4o_eval_de_gn_3.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.3914232592779822,
+ "context_relevancy": 0.05517979452054794,
+ "answer_relevancy": 0.39015395726757396
+},
+{
+ "faithfulness": 0.6666666666666666,
+ "context_utilization": 0.3914232592779822,
+ "context_relevancy": 0.05517979452054794,
+ "answer_relevancy": 0.3864361192318465
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.3914232592779822,
+ "context_relevancy": 0.05517979452054794,
+ "answer_relevancy": 0.3901540653386376
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/gpt4o/scores_cs_diabetes.json b/gnqa/paper1_eval/src/data/results/gpt4o/scores_cs_diabetes.json
new file mode 100644
index 0000000..ef8c661
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/gpt4o/scores_cs_diabetes.json
@@ -0,0 +1,37 @@
+,
+{
+ "faithfulness": 0.8836363636363636,
+ "context_utilization": 0.9533674463200074,
+ "context_relevancy": 0.1906017620560349,
+ "answer_relevancy": 0.9629314894517702
+},
+{
+ "faithfulness": 0.8436363636363637,
+ "context_utilization": 0.9533674463200074,
+ "context_relevancy": 0.20364480596864404,
+ "answer_relevancy": 0.9495337378736439
+},
+{
+ "faithfulness": 0.9292861989650555,
+ "context_utilization": 0.9651063978998563,
+ "context_relevancy": 0.7109415961877185,
+ "answer_relevancy": 0.6638464088279047
+},
+{
+ "faithfulness": 0.4690747444442785,
+ "context_utilization": 0.7745118439410044,
+ "context_relevancy": 0.7140014395170777,
+ "answer_relevancy": 0.9322560108422944
+},
+{
+ "faithfulness": 0.7745118439410044,
+ "context_utilization": 0.3333333333333333,
+ "context_relevancy": 0.3538011695906433,
+ "answer_relevancy": 0.5456168066603103
+},
+{
+ "faithfulness": 0.5657894736779605,
+ "context_utilization": 1.0,
+ "context_relevancy": 0.22142857142857142,
+ "answer_relevancy": 0.7181594110215056
+} \ No newline at end of file