aboutsummaryrefslogtreecommitdiff
path: root/gnqa/paper1_eval/src/data/results/human
diff options
context:
space:
mode:
authorSoloDShelby2024-07-19 14:41:40 +0300
committerSoloDShelby2024-07-19 14:41:40 +0300
commit3fa31b50af2861382fbe2c76406f5a04c3fefc93 (patch)
tree34d581648b0e0d3fc8dbe6577752a4fd433a3258 /gnqa/paper1_eval/src/data/results/human
parent74616897e30c7daafe5e74d34073466464921316 (diff)
downloadgn-ai-3fa31b50af2861382fbe2c76406f5a04c3fefc93.tar.gz
Evaluation code for paper 1
Diffstat (limited to 'gnqa/paper1_eval/src/data/results/human')
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_aging_1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_aging_2.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_aging_3.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_2.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_3.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_4.json39
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_gn_1.json14
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_gn_2.json20
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_gn_3.json25
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_gn_4.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_gn_5.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_gn_6.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_cs_gn_7.json18
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_aging_1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_aging_2.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_2.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.1.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.json18
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_gn_2.json18
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_gn_3.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_gn_4.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_gn_5.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/human/scores_de_gn_6.json19
26 files changed, 513 insertions, 0 deletions
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_1.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_1.json
new file mode 100644
index 0000000..f37296e
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.9428571428571428,
+ "context_utilization": 0.9352808378906239,
+ "context_relevancy": 0.07125660926343383,
+ "answer_relevancy": 0.9523107847972947
+},
+{
+ "faithfulness": 0.9428571428571428,
+ "context_utilization": 0.9355754170487147,
+ "context_relevancy": 0.07125660926343383,
+ "answer_relevancy": 0.9549674105661919
+},
+{
+ "faithfulness": 0.9428571428571428,
+ "context_utilization": 0.9211814776549062,
+ "context_relevancy": 0.07125660926343383,
+ "answer_relevancy": 0.9499741000488516
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_2.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_2.json
new file mode 100644
index 0000000..f7dae45
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_2.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.7742138364779875,
+ "context_utilization": 0.9894163077459343,
+ "context_relevancy": 0.04506568948673187,
+ "answer_relevancy": 0.9408685212116719
+},
+{
+ "faithfulness": 0.7742138364779875,
+ "context_utilization": 0.9894163077459343,
+ "context_relevancy": 0.04506568948673187,
+ "answer_relevancy": 0.9443348131121218
+},
+{
+ "faithfulness": 0.7742138364779875,
+ "context_utilization": 0.9894163077459343,
+ "context_relevancy": 0.04506568948673187,
+ "answer_relevancy": 0.9373602976132769
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_3.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_3.json
new file mode 100644
index 0000000..b844e70
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_3.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.5714285714285715,
+ "context_utilization": 0.8007295763340471,
+ "context_relevancy": 0.17757604714126454,
+ "answer_relevancy": 0.9624406549445811
+},
+{
+ "faithfulness": 0.5714285714285715,
+ "context_utilization": 0.8256406991618427,
+ "context_relevancy": 0.17757604714126454,
+ "answer_relevancy": 0.9624295953235836
+},
+{
+ "faithfulness": 0.5714285714285715,
+ "context_utilization": 0.8256406991618427,
+ "context_relevancy": 0.17757604714126454,
+ "answer_relevancy": 0.9622154472101722
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_1.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_1.json
new file mode 100644
index 0000000..8316988
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.875,
+ "context_utilization": 0.6983276538190184,
+ "context_relevancy": 0.12429532403609515,
+ "answer_relevancy": 0.9112620728936985
+},
+{
+ "faithfulness": 0.875,
+ "context_utilization": 0.6983276538190184,
+ "context_relevancy": 0.09929532403609516,
+ "answer_relevancy": 0.9153897050102227
+},
+{
+ "faithfulness": 0.875,
+ "context_utilization": 0.6983276538190184,
+ "context_relevancy": 0.10864315012305167,
+ "answer_relevancy": 0.917767867097622
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_2.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_2.json
new file mode 100644
index 0000000..7020070
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_2.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.9677256242806254,
+ "context_relevancy": 0.21125490196078428,
+ "answer_relevancy": 0.96903893567995
+},
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.9769465411060386,
+ "context_relevancy": 0.2143799019607843,
+ "answer_relevancy": 0.9657737286038965
+},
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.9769465411060386,
+ "context_relevancy": 0.2143799019607843,
+ "answer_relevancy": 0.9662487631948171
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_3.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_3.json
new file mode 100644
index 0000000..1b57ac7
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_3.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.8400000000000001,
+ "context_utilization": 0.9538081741417747,
+ "context_relevancy": 0.11497132693854006,
+ "answer_relevancy": 0.9169018406443659
+},
+{
+ "faithfulness": 0.8400000000000001,
+ "context_utilization": 0.9538081741417747,
+ "context_relevancy": 0.2016379936052067,
+ "answer_relevancy": 0.9187380038134432
+},
+{
+ "faithfulness": 0.8400000000000001,
+ "context_utilization": 0.9434457191364413,
+ "context_relevancy": 0.11497132693854006,
+ "answer_relevancy": 0.9169054522175759
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_4.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_4.json
new file mode 100644
index 0000000..e54895e
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_4.json
@@ -0,0 +1,39 @@
+[
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.49586940836114385,
+ "context_relevancy": 0.4489795918367347,
+ "answer_relevancy": 0.9050522628722737
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.5332560296769832,
+ "context_relevancy": 0.4489795918367347,
+ "answer_relevancy": 0.9274337314167257
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.49586940836114385,
+ "context_relevancy": 0.4489795918367347,
+ "answer_relevancy": 0.9274337314167257
+}
+]
+,
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.49586940836114385,
+ "context_relevancy": 0.2857142857142857,
+ "answer_relevancy": 0.9050522628722737
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.49586940836114385,
+ "context_relevancy": 0.4489795918367347,
+ "answer_relevancy": 0.9050692102679129
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.49586940836114385,
+ "context_relevancy": 0.4489795918367347,
+ "answer_relevancy": 0.9050522628722737
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_1.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_1.json
new file mode 100644
index 0000000..4481bdb
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_1.json
@@ -0,0 +1,14 @@
+[
+{
+ "faithfulness": 0.9099999999999999,
+ "context_utilization": 0.7636817432217684,
+ "context_relevancy": 0.1880278568582262,
+ "answer_relevancy": 0.9423280729066063
+},
+{
+ "faithfulness": 0.9099999999999999,
+ "context_utilization": 0.7357044805156637,
+ "context_relevancy": 0.15469452352489288,
+ "answer_relevancy": 0.9486310766041234
+}
+]
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_2.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_2.json
new file mode 100644
index 0000000..f0733da
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_2.json
@@ -0,0 +1,20 @@
+[
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.6326643990778912,
+ "context_relevancy": 0.1347400263302517,
+ "answer_relevancy": 0.8746783013952267
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.6683786847884866,
+ "context_relevancy": 0.1508690585883162,
+ "answer_relevancy": 0.8703116371547157
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.6326643990778912,
+ "context_relevancy": 0.1332248748151002,
+ "answer_relevancy": 0.8689393391315343
+}
+]
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_3.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_3.json
new file mode 100644
index 0000000..7258a04
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_3.json
@@ -0,0 +1,25 @@
+,
+{
+ "faithfulness": 0.5677966101694916,
+ "context_utilization": 0.4561270844811867,
+ "context_relevancy": 0.5560185185148071,
+ "answer_relevancy": 0.5052295687739448
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.5643129043087701,
+ "context_relevancy": 0.05599820060366845,
+ "answer_relevancy": 0.7414497144046052
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.5729415276879585,
+ "context_relevancy": 0.05599820060366845,
+ "answer_relevancy": 0.5544292034718707
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.5643129043087701,
+ "context_relevancy": 0.05599820060366845,
+ "answer_relevancy": 0.5571557447633533
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_4.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_4.json
new file mode 100644
index 0000000..15b1eb4
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_4.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.9428571428571428,
+ "context_utilization": 0.789441709521905,
+ "context_relevancy": 0.136784410468621,
+ "answer_relevancy": 0.8500389108331188
+},
+{
+ "faithfulness": 0.9142857142857143,
+ "context_utilization": 0.7921665772467545,
+ "context_relevancy": 0.15115688010424852,
+ "answer_relevancy": 0.8317623611813637
+},
+{
+ "faithfulness": 0.9142857142857143,
+ "context_utilization": 0.789441709521905,
+ "context_relevancy": 0.1713997950840056,
+ "answer_relevancy": 0.8295033051724321
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_5.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_5.json
new file mode 100644
index 0000000..03713c2
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_5.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.9333333333333332,
+ "context_utilization": 0.6801836614504664,
+ "context_relevancy": 0.06454107195486505,
+ "answer_relevancy": 0.7372449377189451
+},
+{
+ "faithfulness": 0.888888888888889,
+ "context_utilization": 0.6582554717950728,
+ "context_relevancy": 0.06454107195486505,
+ "answer_relevancy": 0.7372493726798736
+},
+{
+ "faithfulness": 0.8761904761904763,
+ "context_utilization": 0.6582554717950728,
+ "context_relevancy": 0.06454107195486505,
+ "answer_relevancy": 0.7372449377189451
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_6.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_6.json
new file mode 100644
index 0000000..0d67e80
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_6.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.45564199508207504,
+ "context_relevancy": 0.06005275024001898,
+ "answer_relevancy": 0.8915679391851077
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.45564199508207504,
+ "context_relevancy": 0.05215801339791372,
+ "answer_relevancy": 0.7064299254450507
+},
+{
+ "faithfulness": 0.75,
+ "context_utilization": 0.45564199508207504,
+ "context_relevancy": 0.0707670359543047,
+ "answer_relevancy": 0.705077643467664
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_7.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_7.json
new file mode 100644
index 0000000..a30782a
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_7.json
@@ -0,0 +1,18 @@
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9178474303338136,
+ "context_relevancy": 0.09082338152105594,
+ "answer_relevancy": 0.9524284122181226
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9178474303338136,
+ "context_relevancy": 0.09082338152105594,
+ "answer_relevancy": 0.9492709094955006
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9178474303338136,
+ "context_relevancy": 0.09082338152105594,
+ "answer_relevancy": 0.9524270517859097
+}
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_aging_1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_aging_1.json
new file mode 100644
index 0000000..0700cc3
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_aging_1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.7428571428571429,
+ "context_utilization": 0.811213861888054,
+ "context_relevancy": 0.2314977832798794,
+ "answer_relevancy": 0.9433409234117335
+},
+{
+ "faithfulness": 0.7428571428571429,
+ "context_utilization": 0.7983208584270672,
+ "context_relevancy": 0.24114933391503665,
+ "answer_relevancy": 0.9213466964486724
+},
+{
+ "faithfulness": 0.7142857142857142,
+ "context_utilization": 0.7928499698879043,
+ "context_relevancy": 0.25367860791972047,
+ "answer_relevancy": 0.9318615626710995
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_aging_2.json b/gnqa/paper1_eval/src/data/results/human/scores_de_aging_2.json
new file mode 100644
index 0000000..b7f8cc0
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_aging_2.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999923077,
+ "context_relevancy": 1.0,
+ "answer_relevancy": 0.8836732547434365
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999923077,
+ "context_relevancy": 1.0,
+ "answer_relevancy": 0.8836732547434365
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999923077,
+ "context_relevancy": 1.0,
+ "answer_relevancy": 0.8836732547434365
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.1.json
new file mode 100644
index 0000000..0e46a7f
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.7777403152338384,
+ "context_relevancy": 0.06084656084656084,
+ "answer_relevancy": 0.9645121106959694
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.7777403152338384,
+ "context_relevancy": 0.06084656084656084,
+ "answer_relevancy": 0.9545089573441493
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.7719252969185456,
+ "context_relevancy": 0.05026455026455026,
+ "answer_relevancy": 0.9327156331092903
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.json
new file mode 100644
index 0000000..0b621e2
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.9166666666666667,
+ "context_utilization": 0.7671392748688641,
+ "context_relevancy": 0.33561602418745273,
+ "answer_relevancy": 0.90324232280188
+},
+{
+ "faithfulness": 0.9166666666666667,
+ "context_utilization": 0.8555804271901495,
+ "context_relevancy": 0.2314914450628736,
+ "answer_relevancy": 0.7214993293693964
+},
+{
+ "faithfulness": 0.9666666666666668,
+ "context_utilization": 0.8080409996869443,
+ "context_relevancy": 0.2837641723356009,
+ "answer_relevancy": 0.9014349074286775
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_2.json b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_2.json
new file mode 100644
index 0000000..bd6159a
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_2.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.9407265478802447,
+ "context_relevancy": 0.36922494182022314,
+ "answer_relevancy": 0.9364702737085768
+},
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.9344763371477345,
+ "context_relevancy": 0.386466321130568,
+ "answer_relevancy": 0.944903559928554
+},
+{
+ "faithfulness": 0.96,
+ "context_utilization": 0.9344763371477345,
+ "context_relevancy": 0.36922494182022314,
+ "answer_relevancy": 0.9355512181399582
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.1.json
new file mode 100644
index 0000000..d47c31f
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.1.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.9609375,
+ "context_utilization": 0.6937871661149843,
+ "context_relevancy": 0.13637360626722328,
+ "answer_relevancy": 0.7491735530216923
+},
+{
+ "faithfulness": 0.9609375,
+ "context_utilization": 0.6937871661149843,
+ "context_relevancy": 0.13637360626722328,
+ "answer_relevancy": 0.8902254519253692
+},
+{
+ "faithfulness": 0.9296875,
+ "context_utilization": 0.6937871661149843,
+ "context_relevancy": 0.13637360626722328,
+ "answer_relevancy": 0.7491716987687886
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.json
new file mode 100644
index 0000000..9b8aea1
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.json
@@ -0,0 +1,18 @@
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9596645021564207,
+ "context_relevancy": 0.1634286630390054,
+ "answer_relevancy": 0.8973761639776056
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9596645021564207,
+ "context_relevancy": 0.1634286630390054,
+ "answer_relevancy": 0.9038434542970721
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9561079845997444,
+ "context_relevancy": 0.1634286630390054,
+ "answer_relevancy": 0.8983469111948426
+}
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_2.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_2.json
new file mode 100644
index 0000000..30be099
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_2.json
@@ -0,0 +1,18 @@
+{
+ "faithfulness": 0.8,
+ "context_utilization": 0.7266600180799679,
+ "context_relevancy": 0.12599664343008876,
+ "answer_relevancy": 0.7320068044307713
+},
+{
+ "faithfulness": 0.8,
+ "context_utilization": 0.7266600180799679,
+ "context_relevancy": 0.13234584977929512,
+ "answer_relevancy": 0.7198147208663943
+},
+{
+ "faithfulness": 0.8,
+ "context_utilization": 0.7266600180799679,
+ "context_relevancy": 0.12849969593314126,
+ "answer_relevancy": 0.7325464661134955
+}
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_3.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_3.json
new file mode 100644
index 0000000..33a94ff
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_3.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.8666666666666666,
+ "context_utilization": 0.6480859663109396,
+ "context_relevancy": 0.1510877797535341,
+ "answer_relevancy": 0.915240518467451
+},
+{
+ "faithfulness": 0.8666666666666666,
+ "context_utilization": 0.6480859663109396,
+ "context_relevancy": 0.11387847742795269,
+ "answer_relevancy": 0.9124757388808369
+},
+{
+ "faithfulness": 0.9333333333333332,
+ "context_utilization": 0.6480859663109396,
+ "context_relevancy": 0.1510877797535341,
+ "answer_relevancy": 0.9141762748312928
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_4.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_4.json
new file mode 100644
index 0000000..345f566
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_4.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.65,
+ "context_utilization": 0.354120538187183,
+ "context_relevancy": 0.1120026888642334,
+ "answer_relevancy": 0.7376780691990237
+},
+{
+ "faithfulness": 0.5333333333333333,
+ "context_utilization": 0.34712053818788413,
+ "context_relevancy": 0.1120026888642334,
+ "answer_relevancy": 0.7455570356847625
+},
+{
+ "faithfulness": 0.65,
+ "context_utilization": 0.34712053818788413,
+ "context_relevancy": 0.0993042761658207,
+ "answer_relevancy": 0.7376780609996703
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_5.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_5.json
new file mode 100644
index 0000000..5148d68
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_5.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.8007395937295169,
+ "context_relevancy": 0.049944862903025335,
+ "answer_relevancy": 0.8599243307705603
+},
+{
+ "faithfulness": 0.8,
+ "context_utilization": 0.806603791260579,
+ "context_relevancy": 0.049944862903025335,
+ "answer_relevancy": 0.6986715526356269
+},
+{
+ "faithfulness": 0.9,
+ "context_utilization": 0.806603791260579,
+ "context_relevancy": 0.049944862903025335,
+ "answer_relevancy": 0.8579006890252776
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_6.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_6.json
new file mode 100644
index 0000000..25d04cf
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_6.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999919545,
+ "context_relevancy": 0.20662768031189083,
+ "answer_relevancy": 0.9302858689849556
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999919545,
+ "context_relevancy": 0.2584795321637427,
+ "answer_relevancy": 0.9258655139523131
+},
+{
+ "faithfulness": 1.0,
+ "context_utilization": 0.9999999999919545,
+ "context_relevancy": 0.1992202729044834,
+ "answer_relevancy": 0.9219977486705678
+} \ No newline at end of file