diff options
Diffstat (limited to 'gnqa/paper2_eval/data/scores')
13 files changed, 247 insertions, 0 deletions
diff --git a/gnqa/paper2_eval/data/scores/de_aging_2.json b/gnqa/paper2_eval/data/scores/de_aging_2.json new file mode 100644 index 0000000..5c3b9b7 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/de_aging_2.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8571428571428571, + "answer_relevancy": 0.9949956999959797, + "context_relevancy": 0.016129032258064516, + "context_utilization": 0.9888888888779013 +}, +{ + "faithfulness": 1.0, + "answer_relevancy": 0.9927347208847189, + "context_relevancy": 0.016129032258064516, + "context_utilization": 0.9888888888779013 +}, +{ + "faithfulness": 1.0, + "answer_relevancy": 0.9904737417734579, + "context_relevancy": 0.016129032258064516, + "context_utilization": 0.8227678571325725 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/gpt4o_cs_aging_score.json b/gnqa/paper2_eval/data/scores/gpt4o_cs_aging_score.json new file mode 100644 index 0000000..054bd65 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/gpt4o_cs_aging_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8435247647089751, + "answer_relevancy": 0.9881089261370415, + "context_relevancy": 0.0626087109471981, + "context_utilization": 0.9999999999899998 +}, +{ + "faithfulness": 0.8861904761904761, + "answer_relevancy": 0.9896018313744044, + "context_relevancy": 0.06451347285195999, + "context_utilization": 0.9999999999899998 +}, +{ + "faithfulness": 0.8207883344725448, + "answer_relevancy": 0.9881842770232447, + "context_relevancy": 0.06265981914307604, + "context_utilization": 0.9999999999899998 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/gpt4o_cs_diabetes_score.json b/gnqa/paper2_eval/data/scores/gpt4o_cs_diabetes_score.json new file mode 100644 index 0000000..0226f01 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/gpt4o_cs_diabetes_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.9207323232323231, + "answer_relevancy": 0.9753090544756589, + "context_relevancy": 0.058441912911640505, + "context_utilization": 0.9860918997127556 +}, +{ + "faithfulness": 0.9451515151515151, + "answer_relevancy": 0.9763192509534061, + "context_relevancy": 0.055226068472391796, + "context_utilization": 0.983296406515531 +}, +{ + "faithfulness": 0.9375396825396827, + "answer_relevancy": 0.9753865446862534, + "context_relevancy": 0.05655290829923163, + "context_utilization": 0.9761685090602639 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/gpt4o_cs_gn_score.json b/gnqa/paper2_eval/data/scores/gpt4o_cs_gn_score.json new file mode 100644 index 0000000..0584c79 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/gpt4o_cs_gn_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8976182844932845, + "answer_relevancy": 0.8718703112058132, + "context_relevancy": 0.061916430658313815, + "context_utilization": 0.8942353237499372 +}, +{ + "faithfulness": 0.8984067321567322, + "answer_relevancy": 0.8693138098899877, + "context_relevancy": 0.06339331584209555, + "context_utilization": 0.8957234189880159 +}, +{ + "faithfulness": 0.9011500305250306, + "answer_relevancy": 0.8734160966030811, + "context_relevancy": 0.06326778200966515, + "context_utilization": 0.8973390652465808 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/gpt4o_de_aging_score.json b/gnqa/paper2_eval/data/scores/gpt4o_de_aging_score.json new file mode 100644 index 0000000..752716c --- /dev/null +++ b/gnqa/paper2_eval/data/scores/gpt4o_de_aging_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8916292041292042, + "answer_relevancy": 0.9622909267407932, + "context_relevancy": 0.056009119488898904, + "context_utilization": 0.9949374448752393 +}, +{ + "faithfulness": 0.9239045676545677, + "answer_relevancy": 0.9602574645814024, + "context_relevancy": 0.056009119488898904, + "context_utilization": 0.9999999999899443 +}, +{ + "faithfulness": 0.9022054334554334, + "answer_relevancy": 0.9621900912593574, + "context_relevancy": 0.05556803279781221, + "context_utilization": 0.9964131393197346 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/gpt4o_de_diabetes_score.json b/gnqa/paper2_eval/data/scores/gpt4o_de_diabetes_score.json new file mode 100644 index 0000000..18c3e33 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/gpt4o_de_diabetes_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8694443056943058, + "answer_relevancy": 0.9143466126793479, + "context_relevancy": 0.03506694580871902, + "context_utilization": 0.9838784170972874 +}, +{ + "faithfulness": 0.8246933621933621, + "answer_relevancy": 0.915552384671478, + "context_relevancy": 0.0329103695083071, + "context_utilization": 0.9903549382614113 +}, +{ + "faithfulness": 0.8755350899100899, + "answer_relevancy": 0.9637196237550363, + "context_relevancy": 0.0343820143018697, + "context_utilization": 0.9894689704483846 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/gpt4o_de_gn_score.json b/gnqa/paper2_eval/data/scores/gpt4o_de_gn_score.json new file mode 100644 index 0000000..2cc53f9 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/gpt4o_de_gn_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8297535103785105, + "answer_relevancy": 0.872067854405554, + "context_relevancy": 0.03650042049847416, + "context_utilization": 0.9308504975963435 +}, +{ + "faithfulness": 0.7872641941391942, + "answer_relevancy": 0.8733484807158058, + "context_relevancy": 0.03650042049847416, + "context_utilization": 0.9272795414361721 +}, +{ + "faithfulness": 0.7596802503052503, + "answer_relevancy": 0.8726434115697865, + "context_relevancy": 0.03650042049847416, + "context_utilization": 0.926490378548729 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/human_cs_aging_score.json b/gnqa/paper2_eval/data/scores/human_cs_aging_score.json new file mode 100644 index 0000000..dfd7853 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/human_cs_aging_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8425019425019424, + "answer_relevancy": 0.952484152627895, + "context_relevancy": 0.04961625911070337, + "context_utilization": 0.9981956315188991 +}, +{ + "faithfulness": 0.871989121989122, + "answer_relevancy": 0.9531096223056006, + "context_relevancy": 0.05031277271044277, + "context_utilization": 0.9953228869794345 +}, +{ + "faithfulness": 0.8605672105672105, + "answer_relevancy": 0.9564885313193343, + "context_relevancy": 0.0482337706314407, + "context_utilization": 0.9981956315188991 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/human_cs_diabetes_score.json b/gnqa/paper2_eval/data/scores/human_cs_diabetes_score.json new file mode 100644 index 0000000..8974d29 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/human_cs_diabetes_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.719520757020757, + "answer_relevancy": 0.9516275767101735, + "context_relevancy": 0.0490614785763392, + "context_utilization": 0.9841827876884541 +}, +{ + "faithfulness": 0.7050892857142858, + "answer_relevancy": 0.957680303141668, + "context_relevancy": 0.04485272436758499, + "context_utilization": 0.9900766093374835 +}, +{ + "faithfulness": 0.7229868742368742, + "answer_relevancy": 0.9564961457687489, + "context_relevancy": 0.04876279279378244, + "context_utilization": 0.9841827876884541 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/human_cs_gn_score.json b/gnqa/paper2_eval/data/scores/human_cs_gn_score.json new file mode 100644 index 0000000..2f9f47a --- /dev/null +++ b/gnqa/paper2_eval/data/scores/human_cs_gn_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8040619953533557, + "answer_relevancy": 0.933496097114236, + "context_relevancy": 0.04967998901244118, + "context_utilization": 0.9402004942497864 +}, +{ + "faithfulness": 0.8140579688144026, + "answer_relevancy": 0.9362523112695514, + "context_relevancy": 0.04934854961850179, + "context_utilization": 0.9462251639555944 +}, +{ + "faithfulness": 0.8346159741927046, + "answer_relevancy": 0.9378502646867282, + "context_relevancy": 0.050870465202917374, + "context_utilization": 0.9566437990021893 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/human_de_aging_score.json b/gnqa/paper2_eval/data/scores/human_de_aging_score.json new file mode 100644 index 0000000..8a9abf3 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/human_de_aging_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.6266788766788767, + "answer_relevancy": 0.9706128992746693, + "context_relevancy": 0.045458016797918395, + "context_utilization": 0.9861919459039513 +}, +{ + "faithfulness": 0.7455128205128205, + "answer_relevancy": 0.9737920787712793, + "context_relevancy": 0.040249683464585066, + "context_utilization": 0.9861919459039513 +}, +{ + "faithfulness": 0.6762223283962414, + "answer_relevancy": 0.9763265441772212, + "context_relevancy": 0.045458016797918395, + "context_utilization": 0.9496353247987496 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/human_de_diabetes_score.json b/gnqa/paper2_eval/data/scores/human_de_diabetes_score.json new file mode 100644 index 0000000..8f86e84 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/human_de_diabetes_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.8657178469678469, + "answer_relevancy": 0.9698773165539708, + "context_relevancy": 0.03165020943114643, + "context_utilization": 0.980117182190157 +}, +{ + "faithfulness": 0.8578238001314925, + "answer_relevancy": 0.9720084862982596, + "context_relevancy": 0.03536219511987631, + "context_utilization": 0.9831616300264557 +}, +{ + "faithfulness": 0.7997557997557998, + "answer_relevancy": 0.9715015597017967, + "context_relevancy": 0.033439118196799386, + "context_utilization": 0.9790216727612565 +}
\ No newline at end of file diff --git a/gnqa/paper2_eval/data/scores/human_de_gn_score.json b/gnqa/paper2_eval/data/scores/human_de_gn_score.json new file mode 100644 index 0000000..0155ee1 --- /dev/null +++ b/gnqa/paper2_eval/data/scores/human_de_gn_score.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.9060218485218485, + "answer_relevancy": 0.9180706452518813, + "context_relevancy": 0.04851549372256422, + "context_utilization": 0.9846595919399526 +}, +{ + "faithfulness": 0.9164201118746573, + "answer_relevancy": 0.9191154003593898, + "context_relevancy": 0.04842257317509822, + "context_utilization": 0.978182619837846 +}, +{ + "faithfulness": 0.9114957337449501, + "answer_relevancy": 0.9201106080009104, + "context_relevancy": 0.04557179296455012, + "context_utilization": 0.9875496031612268 +}
\ No newline at end of file |