diff options
author | SoloDShelby | 2024-07-19 14:41:40 +0300 |
---|---|---|
committer | SoloDShelby | 2024-07-19 14:41:40 +0300 |
commit | 3fa31b50af2861382fbe2c76406f5a04c3fefc93 (patch) | |
tree | 34d581648b0e0d3fc8dbe6577752a4fd433a3258 /gnqa/paper1_eval/src/data/results/human | |
parent | 74616897e30c7daafe5e74d34073466464921316 (diff) | |
download | gn-ai-3fa31b50af2861382fbe2c76406f5a04c3fefc93.tar.gz |
Evaluation code for paper 1
Diffstat (limited to 'gnqa/paper1_eval/src/data/results/human')
26 files changed, 513 insertions, 0 deletions
diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_1.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_1.json new file mode 100644 index 0000000..f37296e --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_1.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.9428571428571428, + "context_utilization": 0.9352808378906239, + "context_relevancy": 0.07125660926343383, + "answer_relevancy": 0.9523107847972947 +}, +{ + "faithfulness": 0.9428571428571428, + "context_utilization": 0.9355754170487147, + "context_relevancy": 0.07125660926343383, + "answer_relevancy": 0.9549674105661919 +}, +{ + "faithfulness": 0.9428571428571428, + "context_utilization": 0.9211814776549062, + "context_relevancy": 0.07125660926343383, + "answer_relevancy": 0.9499741000488516 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_2.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_2.json new file mode 100644 index 0000000..f7dae45 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_2.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.7742138364779875, + "context_utilization": 0.9894163077459343, + "context_relevancy": 0.04506568948673187, + "answer_relevancy": 0.9408685212116719 +}, +{ + "faithfulness": 0.7742138364779875, + "context_utilization": 0.9894163077459343, + "context_relevancy": 0.04506568948673187, + "answer_relevancy": 0.9443348131121218 +}, +{ + "faithfulness": 0.7742138364779875, + "context_utilization": 0.9894163077459343, + "context_relevancy": 0.04506568948673187, + "answer_relevancy": 0.9373602976132769 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_3.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_3.json new file mode 100644 index 0000000..b844e70 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_aging_3.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.5714285714285715, + "context_utilization": 0.8007295763340471, + "context_relevancy": 0.17757604714126454, + "answer_relevancy": 0.9624406549445811 +}, +{ + "faithfulness": 0.5714285714285715, + "context_utilization": 0.8256406991618427, + "context_relevancy": 0.17757604714126454, + "answer_relevancy": 0.9624295953235836 +}, +{ + "faithfulness": 0.5714285714285715, + "context_utilization": 0.8256406991618427, + "context_relevancy": 0.17757604714126454, + "answer_relevancy": 0.9622154472101722 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_1.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_1.json new file mode 100644 index 0000000..8316988 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_1.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.875, + "context_utilization": 0.6983276538190184, + "context_relevancy": 0.12429532403609515, + "answer_relevancy": 0.9112620728936985 +}, +{ + "faithfulness": 0.875, + "context_utilization": 0.6983276538190184, + "context_relevancy": 0.09929532403609516, + "answer_relevancy": 0.9153897050102227 +}, +{ + "faithfulness": 0.875, + "context_utilization": 0.6983276538190184, + "context_relevancy": 0.10864315012305167, + "answer_relevancy": 0.917767867097622 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_2.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_2.json new file mode 100644 index 0000000..7020070 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_2.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.96, + "context_utilization": 0.9677256242806254, + "context_relevancy": 0.21125490196078428, + "answer_relevancy": 0.96903893567995 +}, +{ + "faithfulness": 0.96, + "context_utilization": 0.9769465411060386, + "context_relevancy": 0.2143799019607843, + "answer_relevancy": 0.9657737286038965 +}, +{ + "faithfulness": 0.96, + "context_utilization": 0.9769465411060386, + "context_relevancy": 0.2143799019607843, + "answer_relevancy": 0.9662487631948171 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_3.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_3.json new file mode 100644 index 0000000..1b57ac7 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_3.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8400000000000001, + "context_utilization": 0.9538081741417747, + "context_relevancy": 0.11497132693854006, + "answer_relevancy": 0.9169018406443659 +}, +{ + "faithfulness": 0.8400000000000001, + "context_utilization": 0.9538081741417747, + "context_relevancy": 0.2016379936052067, + "answer_relevancy": 0.9187380038134432 +}, +{ + "faithfulness": 0.8400000000000001, + "context_utilization": 0.9434457191364413, + "context_relevancy": 0.11497132693854006, + "answer_relevancy": 0.9169054522175759 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_4.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_4.json new file mode 100644 index 0000000..e54895e --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_diabetes_4.json @@ -0,0 +1,39 @@ +[ +{ + "faithfulness": 0.75, + "context_utilization": 0.49586940836114385, + "context_relevancy": 0.4489795918367347, + "answer_relevancy": 0.9050522628722737 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.5332560296769832, + "context_relevancy": 0.4489795918367347, + "answer_relevancy": 0.9274337314167257 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.49586940836114385, + "context_relevancy": 0.4489795918367347, + "answer_relevancy": 0.9274337314167257 +} +] +, +{ + "faithfulness": 0.75, + "context_utilization": 0.49586940836114385, + "context_relevancy": 0.2857142857142857, + "answer_relevancy": 0.9050522628722737 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.49586940836114385, + "context_relevancy": 0.4489795918367347, + "answer_relevancy": 0.9050692102679129 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.49586940836114385, + "context_relevancy": 0.4489795918367347, + "answer_relevancy": 0.9050522628722737 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_1.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_1.json new file mode 100644 index 0000000..4481bdb --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_1.json @@ -0,0 +1,14 @@ +[ +{ + "faithfulness": 0.9099999999999999, + "context_utilization": 0.7636817432217684, + "context_relevancy": 0.1880278568582262, + "answer_relevancy": 0.9423280729066063 +}, +{ + "faithfulness": 0.9099999999999999, + "context_utilization": 0.7357044805156637, + "context_relevancy": 0.15469452352489288, + "answer_relevancy": 0.9486310766041234 +} +] diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_2.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_2.json new file mode 100644 index 0000000..f0733da --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_2.json @@ -0,0 +1,20 @@ +[ +{ + "faithfulness": 1.0, + "context_utilization": 0.6326643990778912, + "context_relevancy": 0.1347400263302517, + "answer_relevancy": 0.8746783013952267 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.6683786847884866, + "context_relevancy": 0.1508690585883162, + "answer_relevancy": 0.8703116371547157 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.6326643990778912, + "context_relevancy": 0.1332248748151002, + "answer_relevancy": 0.8689393391315343 +} +] diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_3.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_3.json new file mode 100644 index 0000000..7258a04 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_3.json @@ -0,0 +1,25 @@ +, +{ + "faithfulness": 0.5677966101694916, + "context_utilization": 0.4561270844811867, + "context_relevancy": 0.5560185185148071, + "answer_relevancy": 0.5052295687739448 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.5643129043087701, + "context_relevancy": 0.05599820060366845, + "answer_relevancy": 0.7414497144046052 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.5729415276879585, + "context_relevancy": 0.05599820060366845, + "answer_relevancy": 0.5544292034718707 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.5643129043087701, + "context_relevancy": 0.05599820060366845, + "answer_relevancy": 0.5571557447633533 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_4.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_4.json new file mode 100644 index 0000000..15b1eb4 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_4.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.9428571428571428, + "context_utilization": 0.789441709521905, + "context_relevancy": 0.136784410468621, + "answer_relevancy": 0.8500389108331188 +}, +{ + "faithfulness": 0.9142857142857143, + "context_utilization": 0.7921665772467545, + "context_relevancy": 0.15115688010424852, + "answer_relevancy": 0.8317623611813637 +}, +{ + "faithfulness": 0.9142857142857143, + "context_utilization": 0.789441709521905, + "context_relevancy": 0.1713997950840056, + "answer_relevancy": 0.8295033051724321 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_5.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_5.json new file mode 100644 index 0000000..03713c2 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_5.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.9333333333333332, + "context_utilization": 0.6801836614504664, + "context_relevancy": 0.06454107195486505, + "answer_relevancy": 0.7372449377189451 +}, +{ + "faithfulness": 0.888888888888889, + "context_utilization": 0.6582554717950728, + "context_relevancy": 0.06454107195486505, + "answer_relevancy": 0.7372493726798736 +}, +{ + "faithfulness": 0.8761904761904763, + "context_utilization": 0.6582554717950728, + "context_relevancy": 0.06454107195486505, + "answer_relevancy": 0.7372449377189451 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_6.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_6.json new file mode 100644 index 0000000..0d67e80 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_6.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.75, + "context_utilization": 0.45564199508207504, + "context_relevancy": 0.06005275024001898, + "answer_relevancy": 0.8915679391851077 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.45564199508207504, + "context_relevancy": 0.05215801339791372, + "answer_relevancy": 0.7064299254450507 +}, +{ + "faithfulness": 0.75, + "context_utilization": 0.45564199508207504, + "context_relevancy": 0.0707670359543047, + "answer_relevancy": 0.705077643467664 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_7.json b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_7.json new file mode 100644 index 0000000..a30782a --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_cs_gn_7.json @@ -0,0 +1,18 @@ +{ + "faithfulness": 1.0, + "context_utilization": 0.9178474303338136, + "context_relevancy": 0.09082338152105594, + "answer_relevancy": 0.9524284122181226 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.9178474303338136, + "context_relevancy": 0.09082338152105594, + "answer_relevancy": 0.9492709094955006 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.9178474303338136, + "context_relevancy": 0.09082338152105594, + "answer_relevancy": 0.9524270517859097 +} diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_aging_1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_aging_1.json new file mode 100644 index 0000000..0700cc3 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_aging_1.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.7428571428571429, + "context_utilization": 0.811213861888054, + "context_relevancy": 0.2314977832798794, + "answer_relevancy": 0.9433409234117335 +}, +{ + "faithfulness": 0.7428571428571429, + "context_utilization": 0.7983208584270672, + "context_relevancy": 0.24114933391503665, + "answer_relevancy": 0.9213466964486724 +}, +{ + "faithfulness": 0.7142857142857142, + "context_utilization": 0.7928499698879043, + "context_relevancy": 0.25367860791972047, + "answer_relevancy": 0.9318615626710995 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_aging_2.json b/gnqa/paper1_eval/src/data/results/human/scores_de_aging_2.json new file mode 100644 index 0000000..b7f8cc0 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_aging_2.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 1.0, + "context_utilization": 0.9999999999923077, + "context_relevancy": 1.0, + "answer_relevancy": 0.8836732547434365 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.9999999999923077, + "context_relevancy": 1.0, + "answer_relevancy": 0.8836732547434365 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.9999999999923077, + "context_relevancy": 1.0, + "answer_relevancy": 0.8836732547434365 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.1.json new file mode 100644 index 0000000..0e46a7f --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.1.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 1.0, + "context_utilization": 0.7777403152338384, + "context_relevancy": 0.06084656084656084, + "answer_relevancy": 0.9645121106959694 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.7777403152338384, + "context_relevancy": 0.06084656084656084, + "answer_relevancy": 0.9545089573441493 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.7719252969185456, + "context_relevancy": 0.05026455026455026, + "answer_relevancy": 0.9327156331092903 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.json new file mode 100644 index 0000000..0b621e2 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_1.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.9166666666666667, + "context_utilization": 0.7671392748688641, + "context_relevancy": 0.33561602418745273, + "answer_relevancy": 0.90324232280188 +}, +{ + "faithfulness": 0.9166666666666667, + "context_utilization": 0.8555804271901495, + "context_relevancy": 0.2314914450628736, + "answer_relevancy": 0.7214993293693964 +}, +{ + "faithfulness": 0.9666666666666668, + "context_utilization": 0.8080409996869443, + "context_relevancy": 0.2837641723356009, + "answer_relevancy": 0.9014349074286775 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_2.json b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_2.json new file mode 100644 index 0000000..bd6159a --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_diabetes_2.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.96, + "context_utilization": 0.9407265478802447, + "context_relevancy": 0.36922494182022314, + "answer_relevancy": 0.9364702737085768 +}, +{ + "faithfulness": 0.96, + "context_utilization": 0.9344763371477345, + "context_relevancy": 0.386466321130568, + "answer_relevancy": 0.944903559928554 +}, +{ + "faithfulness": 0.96, + "context_utilization": 0.9344763371477345, + "context_relevancy": 0.36922494182022314, + "answer_relevancy": 0.9355512181399582 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.1.json new file mode 100644 index 0000000..d47c31f --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.1.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.9609375, + "context_utilization": 0.6937871661149843, + "context_relevancy": 0.13637360626722328, + "answer_relevancy": 0.7491735530216923 +}, +{ + "faithfulness": 0.9609375, + "context_utilization": 0.6937871661149843, + "context_relevancy": 0.13637360626722328, + "answer_relevancy": 0.8902254519253692 +}, +{ + "faithfulness": 0.9296875, + "context_utilization": 0.6937871661149843, + "context_relevancy": 0.13637360626722328, + "answer_relevancy": 0.7491716987687886 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.json new file mode 100644 index 0000000..9b8aea1 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_1.json @@ -0,0 +1,18 @@ +{ + "faithfulness": 1.0, + "context_utilization": 0.9596645021564207, + "context_relevancy": 0.1634286630390054, + "answer_relevancy": 0.8973761639776056 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.9596645021564207, + "context_relevancy": 0.1634286630390054, + "answer_relevancy": 0.9038434542970721 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.9561079845997444, + "context_relevancy": 0.1634286630390054, + "answer_relevancy": 0.8983469111948426 +} diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_2.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_2.json new file mode 100644 index 0000000..30be099 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_2.json @@ -0,0 +1,18 @@ +{ + "faithfulness": 0.8, + "context_utilization": 0.7266600180799679, + "context_relevancy": 0.12599664343008876, + "answer_relevancy": 0.7320068044307713 +}, +{ + "faithfulness": 0.8, + "context_utilization": 0.7266600180799679, + "context_relevancy": 0.13234584977929512, + "answer_relevancy": 0.7198147208663943 +}, +{ + "faithfulness": 0.8, + "context_utilization": 0.7266600180799679, + "context_relevancy": 0.12849969593314126, + "answer_relevancy": 0.7325464661134955 +} diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_3.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_3.json new file mode 100644 index 0000000..33a94ff --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_3.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8666666666666666, + "context_utilization": 0.6480859663109396, + "context_relevancy": 0.1510877797535341, + "answer_relevancy": 0.915240518467451 +}, +{ + "faithfulness": 0.8666666666666666, + "context_utilization": 0.6480859663109396, + "context_relevancy": 0.11387847742795269, + "answer_relevancy": 0.9124757388808369 +}, +{ + "faithfulness": 0.9333333333333332, + "context_utilization": 0.6480859663109396, + "context_relevancy": 0.1510877797535341, + "answer_relevancy": 0.9141762748312928 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_4.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_4.json new file mode 100644 index 0000000..345f566 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_4.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.65, + "context_utilization": 0.354120538187183, + "context_relevancy": 0.1120026888642334, + "answer_relevancy": 0.7376780691990237 +}, +{ + "faithfulness": 0.5333333333333333, + "context_utilization": 0.34712053818788413, + "context_relevancy": 0.1120026888642334, + "answer_relevancy": 0.7455570356847625 +}, +{ + "faithfulness": 0.65, + "context_utilization": 0.34712053818788413, + "context_relevancy": 0.0993042761658207, + "answer_relevancy": 0.7376780609996703 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_5.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_5.json new file mode 100644 index 0000000..5148d68 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_5.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 1.0, + "context_utilization": 0.8007395937295169, + "context_relevancy": 0.049944862903025335, + "answer_relevancy": 0.8599243307705603 +}, +{ + "faithfulness": 0.8, + "context_utilization": 0.806603791260579, + "context_relevancy": 0.049944862903025335, + "answer_relevancy": 0.6986715526356269 +}, +{ + "faithfulness": 0.9, + "context_utilization": 0.806603791260579, + "context_relevancy": 0.049944862903025335, + "answer_relevancy": 0.8579006890252776 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/human/scores_de_gn_6.json b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_6.json new file mode 100644 index 0000000..25d04cf --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/human/scores_de_gn_6.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 1.0, + "context_utilization": 0.9999999999919545, + "context_relevancy": 0.20662768031189083, + "answer_relevancy": 0.9302858689849556 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.9999999999919545, + "context_relevancy": 0.2584795321637427, + "answer_relevancy": 0.9258655139523131 +}, +{ + "faithfulness": 1.0, + "context_utilization": 0.9999999999919545, + "context_relevancy": 0.1992202729044834, + "answer_relevancy": 0.9219977486705678 +}
\ No newline at end of file |