From 835e229909e9bdb6e084c5112672065886517adb Mon Sep 17 00:00:00 2001
From: Nyeusi D. Shebes
Date: Thu, 27 Feb 2025 22:04:47 -0600
Subject: refactoring codebase

---
 gnqa/data/study1/results/eval2_general1.json       |  7 ++++
 gnqa/data/study1/results/eval2_general2.json       | 13 ++++++++
 gnqa/data/study1/results/eval_aging1.json          | 19 +++++++++++
 gnqa/data/study1/results/eval_aging2.json          | 19 +++++++++++
 gnqa/data/study1/results/eval_experts_aging1.json  | 18 ++++++++++
 gnqa/data/study1/results/eval_experts_aging2.json  | 18 ++++++++++
 .../data/study1/results/eval_experts_general1.json | 19 +++++++++++
 .../data/study1/results/eval_experts_general2.json | 19 +++++++++++
 gnqa/data/study1/results/eval_experts_suga1.json   | 18 ++++++++++
 gnqa/data/study1/results/eval_general1.json        | 18 ++++++++++
 gnqa/data/study1/results/eval_general2.json        | 18 ++++++++++
 gnqa/data/study1/results/eval_suga1.json           | 19 +++++++++++
 gnqa/data/study1/results/eval_suga2.json           | 19 +++++++++++
 gnqa/data/study1/results/eval_sugaA.json           |  7 ++++
 gnqa/data/study1/results/gemma_eval_general1.json  |  7 ++++
 gnqa/data/study1/results/gemma_eval_general2.json  |  7 ++++
 .../results/gpt4o/gpt4o_eval_cs_aging_1.json       | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_cs_aging_2.json       | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_cs_aging_3.json       | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_cs_aging_4.json       | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_cs_diabetes_1.json    | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_cs_diabetes_2.json    | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_cs_diabetes_3.json    | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_cs_diabetes_4.json    | 19 +++++++++++
 .../study1/results/gpt4o/gpt4o_eval_cs_gn_1.json   | 19 +++++++++++
 .../study1/results/gpt4o/gpt4o_eval_cs_gn_3.json   | 19 +++++++++++
 .../study1/results/gpt4o/gpt4o_eval_cs_gn_4.json   | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_de_aging_1.json       | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_de_aging_2.json       | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_de_aging_3.json       | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_de_aging_4.json       | 19 +++++++++++
 .../results/gpt4o/gpt4o_eval_de_diabetes_1.json    | 20 +++++++++++
 .../results/gpt4o/gpt4o_eval_de_diabetes_2.json    | 20 +++++++++++
 .../results/gpt4o/gpt4o_eval_de_diabetes_3.json    | 20 +++++++++++
 .../results/gpt4o/gpt4o_eval_de_diabetes_4.json    | 20 +++++++++++
 .../study1/results/gpt4o/gpt4o_eval_de_gn_3.json   | 19 +++++++++++
 .../study1/results/gpt4o/scores_cs_diabetes.json   | 37 ++++++++++++++++++++
 .../study1/results/human/scores_cs_aging_1.json    | 19 +++++++++++
 .../study1/results/human/scores_cs_aging_2.json    | 19 +++++++++++
 .../study1/results/human/scores_cs_aging_3.json    | 19 +++++++++++
 .../study1/results/human/scores_cs_diabetes_1.json | 19 +++++++++++
 .../study1/results/human/scores_cs_diabetes_2.json | 19 +++++++++++
 .../study1/results/human/scores_cs_diabetes_3.json | 19 +++++++++++
 .../study1/results/human/scores_cs_diabetes_4.json | 39 ++++++++++++++++++++++
 gnqa/data/study1/results/human/scores_cs_gn_1.json | 14 ++++++++
 gnqa/data/study1/results/human/scores_cs_gn_2.json | 20 +++++++++++
 gnqa/data/study1/results/human/scores_cs_gn_3.json | 25 ++++++++++++++
 gnqa/data/study1/results/human/scores_cs_gn_4.json | 19 +++++++++++
 gnqa/data/study1/results/human/scores_cs_gn_5.json | 19 +++++++++++
 gnqa/data/study1/results/human/scores_cs_gn_6.json | 19 +++++++++++
 gnqa/data/study1/results/human/scores_cs_gn_7.json | 18 ++++++++++
 .../study1/results/human/scores_de_aging_1.json    | 19 +++++++++++
 .../study1/results/human/scores_de_aging_2.json    | 19 +++++++++++
 .../results/human/scores_de_diabetes_1.1.json      | 19 +++++++++++
 .../study1/results/human/scores_de_diabetes_1.json | 19 +++++++++++
 .../study1/results/human/scores_de_diabetes_2.json | 19 +++++++++++
 .../study1/results/human/scores_de_gn_1.1.json     | 19 +++++++++++
 gnqa/data/study1/results/human/scores_de_gn_1.json | 18 ++++++++++
 gnqa/data/study1/results/human/scores_de_gn_2.json | 18 ++++++++++
 gnqa/data/study1/results/human/scores_de_gn_3.json | 19 +++++++++++
 gnqa/data/study1/results/human/scores_de_gn_4.json | 19 +++++++++++
 gnqa/data/study1/results/human/scores_de_gn_5.json | 19 +++++++++++
 gnqa/data/study1/results/human/scores_de_gn_6.json | 19 +++++++++++
 gnqa/data/study1/results/llamaeval_general1.json   | 13 ++++++++
 gnqa/data/study1/results/results.json              | 20 +++++++++++
 gnqa/data/study1/results/results_aging.json        | 19 +++++++++++
 gnqa/data/study1/results/test.json                 | 19 +++++++++++
 gnqa/data/study1/results/test2.json                | 19 +++++++++++
 68 files changed, 1269 insertions(+)
 create mode 100644 gnqa/data/study1/results/eval2_general1.json
 create mode 100644 gnqa/data/study1/results/eval2_general2.json
 create mode 100644 gnqa/data/study1/results/eval_aging1.json
 create mode 100644 gnqa/data/study1/results/eval_aging2.json
 create mode 100644 gnqa/data/study1/results/eval_experts_aging1.json
 create mode 100644 gnqa/data/study1/results/eval_experts_aging2.json
 create mode 100644 gnqa/data/study1/results/eval_experts_general1.json
 create mode 100644 gnqa/data/study1/results/eval_experts_general2.json
 create mode 100644 gnqa/data/study1/results/eval_experts_suga1.json
 create mode 100644 gnqa/data/study1/results/eval_general1.json
 create mode 100644 gnqa/data/study1/results/eval_general2.json
 create mode 100644 gnqa/data/study1/results/eval_suga1.json
 create mode 100644 gnqa/data/study1/results/eval_suga2.json
 create mode 100644 gnqa/data/study1/results/eval_sugaA.json
 create mode 100644 gnqa/data/study1/results/gemma_eval_general1.json
 create mode 100644 gnqa/data/study1/results/gemma_eval_general2.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_1.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_2.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_3.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_4.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_1.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_2.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_3.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_4.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_1.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_3.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_4.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_1.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_2.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_3.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_4.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_1.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_2.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_3.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_4.json
 create mode 100644 gnqa/data/study1/results/gpt4o/gpt4o_eval_de_gn_3.json
 create mode 100644 gnqa/data/study1/results/gpt4o/scores_cs_diabetes.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_aging_1.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_aging_2.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_aging_3.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_diabetes_1.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_diabetes_2.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_diabetes_3.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_diabetes_4.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_gn_1.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_gn_2.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_gn_3.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_gn_4.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_gn_5.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_gn_6.json
 create mode 100644 gnqa/data/study1/results/human/scores_cs_gn_7.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_aging_1.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_aging_2.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_diabetes_1.1.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_diabetes_1.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_diabetes_2.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_gn_1.1.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_gn_1.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_gn_2.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_gn_3.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_gn_4.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_gn_5.json
 create mode 100644 gnqa/data/study1/results/human/scores_de_gn_6.json
 create mode 100644 gnqa/data/study1/results/llamaeval_general1.json
 create mode 100644 gnqa/data/study1/results/results.json
 create mode 100644 gnqa/data/study1/results/results_aging.json
 create mode 100644 gnqa/data/study1/results/test.json
 create mode 100644 gnqa/data/study1/results/test2.json

(limited to 'gnqa/data/study1/results')

diff --git a/gnqa/data/study1/results/eval2_general1.json b/gnqa/data/study1/results/eval2_general1.json
new file mode 100644
index 0000000..9c8dd91
--- /dev/null
+++ b/gnqa/data/study1/results/eval2_general1.json
@@ -0,0 +1,7 @@
+,
+{
+  "faithfulness": 0.7428571428571429,
+  "answer_relevancy": 0.9780678036268498,
+  "context_relevancy": 0.09343441716165339,
+  "context_utilization": 0.816596788224676
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval2_general2.json b/gnqa/data/study1/results/eval2_general2.json
new file mode 100644
index 0000000..face395
--- /dev/null
+++ b/gnqa/data/study1/results/eval2_general2.json
@@ -0,0 +1,13 @@
+,
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.10210226586398571,
+  "context_utilization": NaN
+},
+{
+  "faithfulness": 0.85,
+  "answer_relevancy": 0.6948351748903157,
+  "context_relevancy": 0.09669216181532704,
+  "context_utilization": 0.7730960707226785
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval_aging1.json b/gnqa/data/study1/results/eval_aging1.json
new file mode 100644
index 0000000..7f020f8
--- /dev/null
+++ b/gnqa/data/study1/results/eval_aging1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.90332619492291,
+  "context_relevancy": 0.16311053327554975,
+  "context_utilization": 0.9695800984320362
+},
+{
+  "faithfulness": 0.9777777777777779,
+  "answer_relevancy": 0.9152650172290191,
+  "context_relevancy": 0.17545621228789543,
+  "context_utilization": 0.9695800984320362
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9207411197703179,
+  "context_relevancy": 0.19377271060439374,
+  "context_utilization": 0.9695800984320362
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval_aging2.json b/gnqa/data/study1/results/eval_aging2.json
new file mode 100644
index 0000000..5cf1f31
--- /dev/null
+++ b/gnqa/data/study1/results/eval_aging2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9131945711490829,
+  "context_relevancy": 0.0843248379163872,
+  "context_utilization": 0.8269904041235476
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9073113293523962,
+  "context_relevancy": 0.0843248379163872,
+  "context_utilization": 0.833091604265284
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.909257413921701,
+  "context_relevancy": 0.0843248379163872,
+  "context_utilization": 0.833091604265284
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval_experts_aging1.json b/gnqa/data/study1/results/eval_experts_aging1.json
new file mode 100644
index 0000000..19bfc90
--- /dev/null
+++ b/gnqa/data/study1/results/eval_experts_aging1.json
@@ -0,0 +1,18 @@
+{
+  "faithfulness": 0.8742857142857143,
+  "answer_relevancy": 0.9678684040431473,
+  "context_relevancy": 0.2085018446737963,
+  "context_utilization": 0.9272852892960846
+},
+{
+  "faithfulness": 0.8742857142857143,
+  "answer_relevancy": 0.9685606717668597,
+  "context_relevancy": 0.20135898753093917,
+  "context_utilization": 0.9272852892960846
+},
+{
+  "faithfulness": 0.8742857142857143,
+  "answer_relevancy": 0.9690321094868484,
+  "context_relevancy": 0.20135898753093917,
+  "context_utilization": 0.9260832100237781
+}
diff --git a/gnqa/data/study1/results/eval_experts_aging2.json b/gnqa/data/study1/results/eval_experts_aging2.json
new file mode 100644
index 0000000..02c1939
--- /dev/null
+++ b/gnqa/data/study1/results/eval_experts_aging2.json
@@ -0,0 +1,18 @@
+{
+  "faithfulness": 0.9714285714285715,
+  "answer_relevancy": 0.9655810278750667,
+  "context_relevancy": 0.22941000299490866,
+  "context_utilization": 0.9589677983113123
+},
+{
+  "faithfulness": 0.9560439560439562,
+  "answer_relevancy": 0.9751092927895293,
+  "context_relevancy": 0.22941000299490866,
+  "context_utilization": 0.9589677983113123
+},
+{
+  "faithfulness": 0.9560439560439562,
+  "answer_relevancy": 0.9751092927895293,
+  "context_relevancy": 0.23207666966157534,
+  "context_utilization": 0.9516178189920771
+}
diff --git a/gnqa/data/study1/results/eval_experts_general1.json b/gnqa/data/study1/results/eval_experts_general1.json
new file mode 100644
index 0000000..1bba1d5
--- /dev/null
+++ b/gnqa/data/study1/results/eval_experts_general1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9053928340589652,
+  "context_relevancy": 0.2827950558213716,
+  "context_utilization": 0.7705234648910072
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9157326745735066,
+  "context_relevancy": 0.2652511961722488,
+  "context_utilization": 0.7705234648910072
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9096674856564787,
+  "context_relevancy": 0.25472488038277513,
+  "context_utilization": 0.7705234648910072
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval_experts_general2.json b/gnqa/data/study1/results/eval_experts_general2.json
new file mode 100644
index 0000000..00aea70
--- /dev/null
+++ b/gnqa/data/study1/results/eval_experts_general2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.8,
+  "answer_relevancy": 0.903335063636181,
+  "context_relevancy": 0.056258225526498694,
+  "context_utilization": 0.46176446463288745
+},
+{
+  "faithfulness": 0.7666666666666667,
+  "answer_relevancy": 0.904390101613252,
+  "context_relevancy": 0.08775428851862468,
+  "context_utilization": 0.4464446356339682
+},
+{
+  "faithfulness": 0.8,
+  "answer_relevancy": 0.9086449278497206,
+  "context_relevancy": 0.056258225526498694,
+  "context_utilization": 0.46176446463288745
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval_experts_suga1.json b/gnqa/data/study1/results/eval_experts_suga1.json
new file mode 100644
index 0000000..cfabf1a
--- /dev/null
+++ b/gnqa/data/study1/results/eval_experts_suga1.json
@@ -0,0 +1,18 @@
+{
+  "faithfulness": 0.9612,
+  "answer_relevancy": 0.9295,
+  "context_relevancy": 0.1995,
+  "context_utilization": 0.842090248282362
+},
+{
+  "faithfulness": 0.9612403100775193,
+  "answer_relevancy": 0.9266841312155393,
+  "context_relevancy": 0.21207858802198423,
+  "context_utilization": 0.842090248282362
+},
+{
+  "faithfulness": 0.9612403100775193,
+  "answer_relevancy": 0.9284770424352974,
+  "context_relevancy": 0.2014315773749736,
+  "context_utilization": 0.842090248282362
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval_general1.json b/gnqa/data/study1/results/eval_general1.json
new file mode 100644
index 0000000..80dbfc5
--- /dev/null
+++ b/gnqa/data/study1/results/eval_general1.json
@@ -0,0 +1,18 @@
+{
+  "faithfulness": 0.6,
+  "answer_relevancy": 0.9801126654000318,
+  "context_relevancy": 0.09178152459966993,
+  "context_utilization": 0.8517819734097796
+},
+{
+  "faithfulness": 0.6,
+  "answer_relevancy": 0.9825744284107565,
+  "context_relevancy": 0.09178152459966993,
+  "context_utilization": 0.816596788224676
+},
+{
+  "faithfulness": 0.6,
+  "answer_relevancy": 0.9804185355149768,
+  "context_relevancy": 0.09065663938387562,
+  "context_utilization": 0.8517819734097796
+}
diff --git a/gnqa/data/study1/results/eval_general2.json b/gnqa/data/study1/results/eval_general2.json
new file mode 100644
index 0000000..51665e3
--- /dev/null
+++ b/gnqa/data/study1/results/eval_general2.json
@@ -0,0 +1,18 @@
+{
+  "faithfulness": 0.85,
+  "answer_relevancy": 0.6941347949549538,
+  "context_relevancy": 0.09669216181532704,
+  "context_utilization": 0.7730960707226785
+},
+{
+  "faithfulness": 0.85,
+  "answer_relevancy": 0.6934750290194251,
+  "context_relevancy": 0.13879742497322178,
+  "context_utilization": 0.7730960707226785
+},
+{
+  "faithfulness": 0.85,
+  "answer_relevancy": 0.6943081762253429,
+  "context_relevancy": 0.09669216181532704,
+  "context_utilization": 0.7730960707226785
+}
diff --git a/gnqa/data/study1/results/eval_suga1.json b/gnqa/data/study1/results/eval_suga1.json
new file mode 100644
index 0000000..3e162d0
--- /dev/null
+++ b/gnqa/data/study1/results/eval_suga1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9363046208472652,
+  "context_relevancy": 0.10308941188546791,
+  "context_utilization": 0.938356611481667
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9387937731939724,
+  "context_relevancy": 0.10308941188546791,
+  "context_utilization": 0.9662574794748956
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9372333468729981,
+  "context_relevancy": 0.10308941188546791,
+  "context_utilization": 0.9421623086941493
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval_suga2.json b/gnqa/data/study1/results/eval_suga2.json
new file mode 100644
index 0000000..4ea2aa2
--- /dev/null
+++ b/gnqa/data/study1/results/eval_suga2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9318400456917242,
+  "context_relevancy": 0.12194071444495894,
+  "context_utilization": 0.9657545215065534
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9269052398452946,
+  "context_relevancy": 0.12194071444495894,
+  "context_utilization": 0.9657545215065534
+},
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9326698973133014,
+  "context_relevancy": 0.11492317058530979,
+  "context_utilization": 0.9717723548657957
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/eval_sugaA.json b/gnqa/data/study1/results/eval_sugaA.json
new file mode 100644
index 0000000..fda4de7
--- /dev/null
+++ b/gnqa/data/study1/results/eval_sugaA.json
@@ -0,0 +1,7 @@
+,
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.9332465603795168,
+  "context_relevancy": 0.17527404777829225,
+  "context_utilization": 0.9832121070042665
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gemma_eval_general1.json b/gnqa/data/study1/results/gemma_eval_general1.json
new file mode 100644
index 0000000..6b13c83
--- /dev/null
+++ b/gnqa/data/study1/results/gemma_eval_general1.json
@@ -0,0 +1,7 @@
+,
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.017839778759088275,
+  "context_utilization": NaN
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gemma_eval_general2.json b/gnqa/data/study1/results/gemma_eval_general2.json
new file mode 100644
index 0000000..f2d4c5f
--- /dev/null
+++ b/gnqa/data/study1/results/gemma_eval_general2.json
@@ -0,0 +1,7 @@
+,
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.10522726586398572,
+  "context_utilization": NaN
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_1.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_1.json
new file mode 100644
index 0000000..017d467
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.9,
+  "context_utilization": 0.9070781944697044,
+  "context_relevancy": 0.2509564217695168,
+  "answer_relevancy": 0.9766358986013376
+},
+{
+  "faithfulness": 0.9,
+  "context_utilization": 0.9070781944697044,
+  "context_relevancy": 0.39381356462665973,
+  "answer_relevancy": 0.9825656372129992
+},
+{
+  "faithfulness": 0.9,
+  "context_utilization": 0.9104451978368653,
+  "context_relevancy": 0.39381356462665973,
+  "answer_relevancy": 0.973147869814394
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_2.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_2.json
new file mode 100644
index 0000000..16e0754
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.999999999991935,
+  "context_relevancy": 0.135272921108742,
+  "answer_relevancy": 0.9479744529828181
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.999999999991935,
+  "context_relevancy": 0.135272921108742,
+  "answer_relevancy": 0.951711024285933
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.999999999991935,
+  "context_relevancy": 0.14987988628287136,
+  "answer_relevancy": 0.9541549710773409
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_3.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_3.json
new file mode 100644
index 0000000..566613d
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_3.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.97675568021047,
+  "context_relevancy": 0.2259505726726024,
+  "answer_relevancy": 0.9448278057931704
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.97675568021047,
+  "context_relevancy": 0.21568920951760603,
+  "answer_relevancy": 0.9444115188658463
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.97675568021047,
+  "context_relevancy": 0.22922926119719259,
+  "answer_relevancy": 0.9444470134072755
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_4.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_4.json
new file mode 100644
index 0000000..61632cf
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_aging_4.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.9375,
+  "context_utilization": 0.9456511261659628,
+  "context_relevancy": 0.19499540357020145,
+  "answer_relevancy": 0.9422926379891006
+},
+{
+  "faithfulness": 0.9375,
+  "context_utilization": 0.9213036834852352,
+  "context_relevancy": 0.18966624996518577,
+  "answer_relevancy": 0.9493955674020345
+},
+{
+  "faithfulness": 0.9375,
+  "context_utilization": 0.9213036834852352,
+  "context_relevancy": 0.19896857554658115,
+  "answer_relevancy": 0.9454532501945042
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_1.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_1.json
new file mode 100644
index 0000000..63646cf
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.8533333333333333,
+  "context_utilization": 0.9438491717704647,
+  "context_relevancy": 0.20436440992383947,
+  "answer_relevancy": 0.957861571692806
+},
+{
+  "faithfulness": 0.8355555555555556,
+  "context_utilization": 0.9438491717704647,
+  "context_relevancy": 0.2012874868469164,
+  "answer_relevancy": 0.9533191002746577
+},
+{
+  "faithfulness": 0.8533333333333333,
+  "context_utilization": 0.9438491717704647,
+  "context_relevancy": 0.18389618249909034,
+  "answer_relevancy": 0.9498105973186146
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_2.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_2.json
new file mode 100644
index 0000000..02fe10f
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.9583333333333334,
+  "context_utilization": 0.7194444444356269,
+  "context_relevancy": 0.45524315840105317,
+  "answer_relevancy": 0.9496830965502638
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.7220833333238528,
+  "context_relevancy": 0.3970421001999949,
+  "answer_relevancy": 0.947827635665291
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.7194444444356269,
+  "context_relevancy": 0.3941849573428521,
+  "answer_relevancy": 0.9388702679644993
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_3.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_3.json
new file mode 100644
index 0000000..6566e51
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_3.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9237332568786083,
+  "context_relevancy": 0.2418398640689662,
+  "answer_relevancy": 0.9914901338443677
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9237332568786083,
+  "context_relevancy": 0.2352516287748486,
+  "answer_relevancy": 0.9926324858517163
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9295047961859101,
+  "context_relevancy": 0.2352516287748486,
+  "answer_relevancy": 0.9942151664950669
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_4.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_4.json
new file mode 100644
index 0000000..29e72c0
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_diabetes_4.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.8382274392203959,
+  "context_relevancy": 0.21850226437090842,
+  "answer_relevancy": 0.9268774561175513
+},
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.8289482840320825,
+  "context_relevancy": 0.21792356066720475,
+  "answer_relevancy": 0.9264507966486306
+},
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.8382274392203959,
+  "context_relevancy": 0.22104856066720474,
+  "answer_relevancy": 0.9306530537050953
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_1.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_1.json
new file mode 100644
index 0000000..25a71b0
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.19999999999900003,
+  "context_relevancy": 0.05,
+  "answer_relevancy": 0.1823656883581401
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.19999999999900003,
+  "context_relevancy": 0.05,
+  "answer_relevancy": 0.1823656883581401
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.19999999999900003,
+  "context_relevancy": 0.05,
+  "answer_relevancy": 0.1823656883581401
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_3.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_3.json
new file mode 100644
index 0000000..580e854
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_3.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.5999999999959664,
+  "context_relevancy": 0.22450090744101633,
+  "answer_relevancy": 0.562411241022707
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.5999999999959664,
+  "context_relevancy": 0.1687443284936479,
+  "answer_relevancy": 0.5643801560995779
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.5999999999959664,
+  "context_relevancy": 0.1687443284936479,
+  "answer_relevancy": 0.5617108358354678
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_4.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_4.json
new file mode 100644
index 0000000..bcfc652
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_cs_gn_4.json
@@ -0,0 +1,19 @@
+[
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.19999999999882354,
+  "context_relevancy": 0.065625,
+  "answer_relevancy": 0.1834019127645967
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.1999999999988889,
+  "context_relevancy": 0.065625,
+  "answer_relevancy": 0.18443207660654864
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.19999999999882354,
+  "context_relevancy": 0.065625,
+  "answer_relevancy": 0.18442316533105405
+}]
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_1.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_1.json
new file mode 100644
index 0000000..f719092
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.9479350312277262,
+  "context_relevancy": 0.21303541253345637,
+  "answer_relevancy": 0.9224404704070004
+},
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.9479350312277262,
+  "context_relevancy": 0.21303541253345637,
+  "answer_relevancy": 0.9204895776596349
+},
+{
+  "faithfulness": 0.975,
+  "context_utilization": 0.9479350312277262,
+  "context_relevancy": 0.21303541253345637,
+  "answer_relevancy": 0.9233177482569399
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_2.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_2.json
new file mode 100644
index 0000000..6539d02
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999917659,
+  "context_relevancy": 0.12455653962641092,
+  "answer_relevancy": 0.9215002061256425
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999917659,
+  "context_relevancy": 0.11027082534069661,
+  "answer_relevancy": 0.9238905660966263
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999917659,
+  "context_relevancy": 0.10345264352251479,
+  "answer_relevancy": 0.9236938936685843
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_3.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_3.json
new file mode 100644
index 0000000..13c967f
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_3.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9017950700460371,
+  "context_relevancy": 0.15025391166567637,
+  "answer_relevancy": 0.9080233205044008
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9017950700460371,
+  "context_relevancy": 0.1521235888294712,
+  "answer_relevancy": 0.9183172871520828
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9017950700460371,
+  "context_relevancy": 0.14271182412358882,
+  "answer_relevancy": 0.914051539296523
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_4.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_4.json
new file mode 100644
index 0000000..b40e032
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_aging_4.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.873908075621365,
+  "context_relevancy": 0.13236286714496703,
+  "answer_relevancy": 0.9379656935564172
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.873908075621365,
+  "context_relevancy": 0.13236286714496703,
+  "answer_relevancy": 0.9291571366744364
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.873908075621365,
+  "context_relevancy": 0.13236286714496703,
+  "answer_relevancy": 0.9374908833538264
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_1.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_1.json
new file mode 100644
index 0000000..d06530b
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_1.json
@@ -0,0 +1,20 @@
+[
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9898660740877201,
+  "context_relevancy": 0.31265901349702185,
+  "answer_relevancy": 0.9236030246314068
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9898660740877201,
+  "context_relevancy": 0.14113303947104788,
+  "answer_relevancy": 0.9150252742414604
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9728819471034,
+  "context_relevancy": 0.13863303947104788,
+  "answer_relevancy": 0.9148789006153158
+}
+]
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_2.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_2.json
new file mode 100644
index 0000000..e9fee86
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_2.json
@@ -0,0 +1,20 @@
+[
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.7124087573371619,
+  "context_relevancy": 0.22621316914080075,
+  "answer_relevancy": 0.9046933431898141
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.7004998969667501,
+  "context_relevancy": 0.23871316914080074,
+  "answer_relevancy": 0.9058328551471282
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.7124087573371619,
+  "context_relevancy": 0.24675410481331536,
+  "answer_relevancy": 0.9079384840142384
+}
+]
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_3.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_3.json
new file mode 100644
index 0000000..e39107d
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_3.json
@@ -0,0 +1,20 @@
+[
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.7479011200345999,
+  "context_relevancy": 0.2814642730385713,
+  "answer_relevancy": 0.8930647394153285
+},
+{
+  "faithfulness": 0.9099999999999999,
+  "context_utilization": 0.7479011200345999,
+  "context_relevancy": 0.2814642730385713,
+  "answer_relevancy": 0.896847471293901
+},
+{
+  "faithfulness": 0.9099999999999999,
+  "context_utilization": 0.7479011200345999,
+  "context_relevancy": 0.2814642730385713,
+  "answer_relevancy": 0.8912330225043821
+}
+]
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_4.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_4.json
new file mode 100644
index 0000000..2be82a9
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_diabetes_4.json
@@ -0,0 +1,20 @@
+[
+{
+  "faithfulness": 0.9333333333333332,
+  "context_utilization": 0.7297725885164278,
+  "context_relevancy": 0.17196237023200656,
+  "answer_relevancy": 0.8650648136737542
+},
+{
+  "faithfulness": 0.9333333333333332,
+  "context_utilization": 0.7297725885164278,
+  "context_relevancy": 0.19056702139479725,
+  "answer_relevancy": 0.877389474552466
+},
+{
+  "faithfulness": 0.9333333333333332,
+  "context_utilization": 0.7297725885164278,
+  "context_relevancy": 0.12413628327548483,
+  "answer_relevancy": 0.8783898419790906
+}
+]
diff --git a/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_gn_3.json b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_gn_3.json
new file mode 100644
index 0000000..8f33b47
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/gpt4o_eval_de_gn_3.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.3914232592779822,
+  "context_relevancy": 0.05517979452054794,
+  "answer_relevancy": 0.39015395726757396
+},
+{
+  "faithfulness": 0.6666666666666666,
+  "context_utilization": 0.3914232592779822,
+  "context_relevancy": 0.05517979452054794,
+  "answer_relevancy": 0.3864361192318465
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.3914232592779822,
+  "context_relevancy": 0.05517979452054794,
+  "answer_relevancy": 0.3901540653386376
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/gpt4o/scores_cs_diabetes.json b/gnqa/data/study1/results/gpt4o/scores_cs_diabetes.json
new file mode 100644
index 0000000..ef8c661
--- /dev/null
+++ b/gnqa/data/study1/results/gpt4o/scores_cs_diabetes.json
@@ -0,0 +1,37 @@
+,
+{
+  "faithfulness": 0.8836363636363636,
+  "context_utilization": 0.9533674463200074,
+  "context_relevancy": 0.1906017620560349,
+  "answer_relevancy": 0.9629314894517702
+},
+{
+  "faithfulness": 0.8436363636363637,
+  "context_utilization": 0.9533674463200074,
+  "context_relevancy": 0.20364480596864404,
+  "answer_relevancy": 0.9495337378736439
+},
+{
+  "faithfulness": 0.9292861989650555,
+  "context_utilization": 0.9651063978998563,
+  "context_relevancy": 0.7109415961877185,
+  "answer_relevancy": 0.6638464088279047
+},
+{
+  "faithfulness": 0.4690747444442785,
+  "context_utilization": 0.7745118439410044,
+  "context_relevancy": 0.7140014395170777,
+  "answer_relevancy": 0.9322560108422944
+},
+{
+  "faithfulness": 0.7745118439410044,
+  "context_utilization": 0.3333333333333333,
+  "context_relevancy": 0.3538011695906433,
+  "answer_relevancy": 0.5456168066603103
+},
+{
+  "faithfulness": 0.5657894736779605,
+  "context_utilization": 1.0,
+  "context_relevancy": 0.22142857142857142,
+  "answer_relevancy": 0.7181594110215056
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_aging_1.json b/gnqa/data/study1/results/human/scores_cs_aging_1.json
new file mode 100644
index 0000000..f37296e
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_aging_1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.9428571428571428,
+  "context_utilization": 0.9352808378906239,
+  "context_relevancy": 0.07125660926343383,
+  "answer_relevancy": 0.9523107847972947
+},
+{
+  "faithfulness": 0.9428571428571428,
+  "context_utilization": 0.9355754170487147,
+  "context_relevancy": 0.07125660926343383,
+  "answer_relevancy": 0.9549674105661919
+},
+{
+  "faithfulness": 0.9428571428571428,
+  "context_utilization": 0.9211814776549062,
+  "context_relevancy": 0.07125660926343383,
+  "answer_relevancy": 0.9499741000488516
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_aging_2.json b/gnqa/data/study1/results/human/scores_cs_aging_2.json
new file mode 100644
index 0000000..f7dae45
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_aging_2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.7742138364779875,
+  "context_utilization": 0.9894163077459343,
+  "context_relevancy": 0.04506568948673187,
+  "answer_relevancy": 0.9408685212116719
+},
+{
+  "faithfulness": 0.7742138364779875,
+  "context_utilization": 0.9894163077459343,
+  "context_relevancy": 0.04506568948673187,
+  "answer_relevancy": 0.9443348131121218
+},
+{
+  "faithfulness": 0.7742138364779875,
+  "context_utilization": 0.9894163077459343,
+  "context_relevancy": 0.04506568948673187,
+  "answer_relevancy": 0.9373602976132769
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_aging_3.json b/gnqa/data/study1/results/human/scores_cs_aging_3.json
new file mode 100644
index 0000000..b844e70
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_aging_3.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.5714285714285715,
+  "context_utilization": 0.8007295763340471,
+  "context_relevancy": 0.17757604714126454,
+  "answer_relevancy": 0.9624406549445811
+},
+{
+  "faithfulness": 0.5714285714285715,
+  "context_utilization": 0.8256406991618427,
+  "context_relevancy": 0.17757604714126454,
+  "answer_relevancy": 0.9624295953235836
+},
+{
+  "faithfulness": 0.5714285714285715,
+  "context_utilization": 0.8256406991618427,
+  "context_relevancy": 0.17757604714126454,
+  "answer_relevancy": 0.9622154472101722
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_diabetes_1.json b/gnqa/data/study1/results/human/scores_cs_diabetes_1.json
new file mode 100644
index 0000000..8316988
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_diabetes_1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.875,
+  "context_utilization": 0.6983276538190184,
+  "context_relevancy": 0.12429532403609515,
+  "answer_relevancy": 0.9112620728936985
+},
+{
+  "faithfulness": 0.875,
+  "context_utilization": 0.6983276538190184,
+  "context_relevancy": 0.09929532403609516,
+  "answer_relevancy": 0.9153897050102227
+},
+{
+  "faithfulness": 0.875,
+  "context_utilization": 0.6983276538190184,
+  "context_relevancy": 0.10864315012305167,
+  "answer_relevancy": 0.917767867097622
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_diabetes_2.json b/gnqa/data/study1/results/human/scores_cs_diabetes_2.json
new file mode 100644
index 0000000..7020070
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_diabetes_2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.9677256242806254,
+  "context_relevancy": 0.21125490196078428,
+  "answer_relevancy": 0.96903893567995
+},
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.9769465411060386,
+  "context_relevancy": 0.2143799019607843,
+  "answer_relevancy": 0.9657737286038965
+},
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.9769465411060386,
+  "context_relevancy": 0.2143799019607843,
+  "answer_relevancy": 0.9662487631948171
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_diabetes_3.json b/gnqa/data/study1/results/human/scores_cs_diabetes_3.json
new file mode 100644
index 0000000..1b57ac7
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_diabetes_3.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.8400000000000001,
+  "context_utilization": 0.9538081741417747,
+  "context_relevancy": 0.11497132693854006,
+  "answer_relevancy": 0.9169018406443659
+},
+{
+  "faithfulness": 0.8400000000000001,
+  "context_utilization": 0.9538081741417747,
+  "context_relevancy": 0.2016379936052067,
+  "answer_relevancy": 0.9187380038134432
+},
+{
+  "faithfulness": 0.8400000000000001,
+  "context_utilization": 0.9434457191364413,
+  "context_relevancy": 0.11497132693854006,
+  "answer_relevancy": 0.9169054522175759
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_diabetes_4.json b/gnqa/data/study1/results/human/scores_cs_diabetes_4.json
new file mode 100644
index 0000000..e54895e
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_diabetes_4.json
@@ -0,0 +1,39 @@
+[
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.49586940836114385,
+  "context_relevancy": 0.4489795918367347,
+  "answer_relevancy": 0.9050522628722737
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.5332560296769832,
+  "context_relevancy": 0.4489795918367347,
+  "answer_relevancy": 0.9274337314167257
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.49586940836114385,
+  "context_relevancy": 0.4489795918367347,
+  "answer_relevancy": 0.9274337314167257
+}
+]
+,
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.49586940836114385,
+  "context_relevancy": 0.2857142857142857,
+  "answer_relevancy": 0.9050522628722737
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.49586940836114385,
+  "context_relevancy": 0.4489795918367347,
+  "answer_relevancy": 0.9050692102679129
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.49586940836114385,
+  "context_relevancy": 0.4489795918367347,
+  "answer_relevancy": 0.9050522628722737
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_gn_1.json b/gnqa/data/study1/results/human/scores_cs_gn_1.json
new file mode 100644
index 0000000..4481bdb
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_gn_1.json
@@ -0,0 +1,14 @@
+[
+{
+  "faithfulness": 0.9099999999999999,
+  "context_utilization": 0.7636817432217684,
+  "context_relevancy": 0.1880278568582262,
+  "answer_relevancy": 0.9423280729066063
+},
+{
+  "faithfulness": 0.9099999999999999,
+  "context_utilization": 0.7357044805156637,
+  "context_relevancy": 0.15469452352489288,
+  "answer_relevancy": 0.9486310766041234
+}
+]
diff --git a/gnqa/data/study1/results/human/scores_cs_gn_2.json b/gnqa/data/study1/results/human/scores_cs_gn_2.json
new file mode 100644
index 0000000..f0733da
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_gn_2.json
@@ -0,0 +1,20 @@
+[
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.6326643990778912,
+  "context_relevancy": 0.1347400263302517,
+  "answer_relevancy": 0.8746783013952267
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.6683786847884866,
+  "context_relevancy": 0.1508690585883162,
+  "answer_relevancy": 0.8703116371547157
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.6326643990778912,
+  "context_relevancy": 0.1332248748151002,
+  "answer_relevancy": 0.8689393391315343
+}
+]
diff --git a/gnqa/data/study1/results/human/scores_cs_gn_3.json b/gnqa/data/study1/results/human/scores_cs_gn_3.json
new file mode 100644
index 0000000..7258a04
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_gn_3.json
@@ -0,0 +1,25 @@
+,
+{
+  "faithfulness": 0.5677966101694916,
+  "context_utilization": 0.4561270844811867,
+  "context_relevancy": 0.5560185185148071,
+  "answer_relevancy": 0.5052295687739448
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.5643129043087701,
+  "context_relevancy": 0.05599820060366845,
+  "answer_relevancy": 0.7414497144046052
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.5729415276879585,
+  "context_relevancy": 0.05599820060366845,
+  "answer_relevancy": 0.5544292034718707
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.5643129043087701,
+  "context_relevancy": 0.05599820060366845,
+  "answer_relevancy": 0.5571557447633533
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_gn_4.json b/gnqa/data/study1/results/human/scores_cs_gn_4.json
new file mode 100644
index 0000000..15b1eb4
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_gn_4.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.9428571428571428,
+  "context_utilization": 0.789441709521905,
+  "context_relevancy": 0.136784410468621,
+  "answer_relevancy": 0.8500389108331188
+},
+{
+  "faithfulness": 0.9142857142857143,
+  "context_utilization": 0.7921665772467545,
+  "context_relevancy": 0.15115688010424852,
+  "answer_relevancy": 0.8317623611813637
+},
+{
+  "faithfulness": 0.9142857142857143,
+  "context_utilization": 0.789441709521905,
+  "context_relevancy": 0.1713997950840056,
+  "answer_relevancy": 0.8295033051724321
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_gn_5.json b/gnqa/data/study1/results/human/scores_cs_gn_5.json
new file mode 100644
index 0000000..03713c2
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_gn_5.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.9333333333333332,
+  "context_utilization": 0.6801836614504664,
+  "context_relevancy": 0.06454107195486505,
+  "answer_relevancy": 0.7372449377189451
+},
+{
+  "faithfulness": 0.888888888888889,
+  "context_utilization": 0.6582554717950728,
+  "context_relevancy": 0.06454107195486505,
+  "answer_relevancy": 0.7372493726798736
+},
+{
+  "faithfulness": 0.8761904761904763,
+  "context_utilization": 0.6582554717950728,
+  "context_relevancy": 0.06454107195486505,
+  "answer_relevancy": 0.7372449377189451
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_gn_6.json b/gnqa/data/study1/results/human/scores_cs_gn_6.json
new file mode 100644
index 0000000..0d67e80
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_gn_6.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.45564199508207504,
+  "context_relevancy": 0.06005275024001898,
+  "answer_relevancy": 0.8915679391851077
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.45564199508207504,
+  "context_relevancy": 0.05215801339791372,
+  "answer_relevancy": 0.7064299254450507
+},
+{
+  "faithfulness": 0.75,
+  "context_utilization": 0.45564199508207504,
+  "context_relevancy": 0.0707670359543047,
+  "answer_relevancy": 0.705077643467664
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_cs_gn_7.json b/gnqa/data/study1/results/human/scores_cs_gn_7.json
new file mode 100644
index 0000000..a30782a
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_cs_gn_7.json
@@ -0,0 +1,18 @@
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9178474303338136,
+  "context_relevancy": 0.09082338152105594,
+  "answer_relevancy": 0.9524284122181226
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9178474303338136,
+  "context_relevancy": 0.09082338152105594,
+  "answer_relevancy": 0.9492709094955006
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9178474303338136,
+  "context_relevancy": 0.09082338152105594,
+  "answer_relevancy": 0.9524270517859097
+}
diff --git a/gnqa/data/study1/results/human/scores_de_aging_1.json b/gnqa/data/study1/results/human/scores_de_aging_1.json
new file mode 100644
index 0000000..0700cc3
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_aging_1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.7428571428571429,
+  "context_utilization": 0.811213861888054,
+  "context_relevancy": 0.2314977832798794,
+  "answer_relevancy": 0.9433409234117335
+},
+{
+  "faithfulness": 0.7428571428571429,
+  "context_utilization": 0.7983208584270672,
+  "context_relevancy": 0.24114933391503665,
+  "answer_relevancy": 0.9213466964486724
+},
+{
+  "faithfulness": 0.7142857142857142,
+  "context_utilization": 0.7928499698879043,
+  "context_relevancy": 0.25367860791972047,
+  "answer_relevancy": 0.9318615626710995
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_aging_2.json b/gnqa/data/study1/results/human/scores_de_aging_2.json
new file mode 100644
index 0000000..b7f8cc0
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_aging_2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999923077,
+  "context_relevancy": 1.0,
+  "answer_relevancy": 0.8836732547434365
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999923077,
+  "context_relevancy": 1.0,
+  "answer_relevancy": 0.8836732547434365
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999923077,
+  "context_relevancy": 1.0,
+  "answer_relevancy": 0.8836732547434365
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_diabetes_1.1.json b/gnqa/data/study1/results/human/scores_de_diabetes_1.1.json
new file mode 100644
index 0000000..0e46a7f
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_diabetes_1.1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.7777403152338384,
+  "context_relevancy": 0.06084656084656084,
+  "answer_relevancy": 0.9645121106959694
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.7777403152338384,
+  "context_relevancy": 0.06084656084656084,
+  "answer_relevancy": 0.9545089573441493
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.7719252969185456,
+  "context_relevancy": 0.05026455026455026,
+  "answer_relevancy": 0.9327156331092903
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_diabetes_1.json b/gnqa/data/study1/results/human/scores_de_diabetes_1.json
new file mode 100644
index 0000000..0b621e2
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_diabetes_1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.9166666666666667,
+  "context_utilization": 0.7671392748688641,
+  "context_relevancy": 0.33561602418745273,
+  "answer_relevancy": 0.90324232280188
+},
+{
+  "faithfulness": 0.9166666666666667,
+  "context_utilization": 0.8555804271901495,
+  "context_relevancy": 0.2314914450628736,
+  "answer_relevancy": 0.7214993293693964
+},
+{
+  "faithfulness": 0.9666666666666668,
+  "context_utilization": 0.8080409996869443,
+  "context_relevancy": 0.2837641723356009,
+  "answer_relevancy": 0.9014349074286775
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_diabetes_2.json b/gnqa/data/study1/results/human/scores_de_diabetes_2.json
new file mode 100644
index 0000000..bd6159a
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_diabetes_2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.9407265478802447,
+  "context_relevancy": 0.36922494182022314,
+  "answer_relevancy": 0.9364702737085768
+},
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.9344763371477345,
+  "context_relevancy": 0.386466321130568,
+  "answer_relevancy": 0.944903559928554
+},
+{
+  "faithfulness": 0.96,
+  "context_utilization": 0.9344763371477345,
+  "context_relevancy": 0.36922494182022314,
+  "answer_relevancy": 0.9355512181399582
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_gn_1.1.json b/gnqa/data/study1/results/human/scores_de_gn_1.1.json
new file mode 100644
index 0000000..d47c31f
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_gn_1.1.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.9609375,
+  "context_utilization": 0.6937871661149843,
+  "context_relevancy": 0.13637360626722328,
+  "answer_relevancy": 0.7491735530216923
+},
+{
+  "faithfulness": 0.9609375,
+  "context_utilization": 0.6937871661149843,
+  "context_relevancy": 0.13637360626722328,
+  "answer_relevancy": 0.8902254519253692
+},
+{
+  "faithfulness": 0.9296875,
+  "context_utilization": 0.6937871661149843,
+  "context_relevancy": 0.13637360626722328,
+  "answer_relevancy": 0.7491716987687886
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_gn_1.json b/gnqa/data/study1/results/human/scores_de_gn_1.json
new file mode 100644
index 0000000..9b8aea1
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_gn_1.json
@@ -0,0 +1,18 @@
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9596645021564207,
+  "context_relevancy": 0.1634286630390054,
+  "answer_relevancy": 0.8973761639776056
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9596645021564207,
+  "context_relevancy": 0.1634286630390054,
+  "answer_relevancy": 0.9038434542970721
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9561079845997444,
+  "context_relevancy": 0.1634286630390054,
+  "answer_relevancy": 0.8983469111948426
+}
diff --git a/gnqa/data/study1/results/human/scores_de_gn_2.json b/gnqa/data/study1/results/human/scores_de_gn_2.json
new file mode 100644
index 0000000..30be099
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_gn_2.json
@@ -0,0 +1,18 @@
+{
+  "faithfulness": 0.8,
+  "context_utilization": 0.7266600180799679,
+  "context_relevancy": 0.12599664343008876,
+  "answer_relevancy": 0.7320068044307713
+},
+{
+  "faithfulness": 0.8,
+  "context_utilization": 0.7266600180799679,
+  "context_relevancy": 0.13234584977929512,
+  "answer_relevancy": 0.7198147208663943
+},
+{
+  "faithfulness": 0.8,
+  "context_utilization": 0.7266600180799679,
+  "context_relevancy": 0.12849969593314126,
+  "answer_relevancy": 0.7325464661134955
+}
diff --git a/gnqa/data/study1/results/human/scores_de_gn_3.json b/gnqa/data/study1/results/human/scores_de_gn_3.json
new file mode 100644
index 0000000..33a94ff
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_gn_3.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.8666666666666666,
+  "context_utilization": 0.6480859663109396,
+  "context_relevancy": 0.1510877797535341,
+  "answer_relevancy": 0.915240518467451
+},
+{
+  "faithfulness": 0.8666666666666666,
+  "context_utilization": 0.6480859663109396,
+  "context_relevancy": 0.11387847742795269,
+  "answer_relevancy": 0.9124757388808369
+},
+{
+  "faithfulness": 0.9333333333333332,
+  "context_utilization": 0.6480859663109396,
+  "context_relevancy": 0.1510877797535341,
+  "answer_relevancy": 0.9141762748312928
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_gn_4.json b/gnqa/data/study1/results/human/scores_de_gn_4.json
new file mode 100644
index 0000000..345f566
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_gn_4.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.65,
+  "context_utilization": 0.354120538187183,
+  "context_relevancy": 0.1120026888642334,
+  "answer_relevancy": 0.7376780691990237
+},
+{
+  "faithfulness": 0.5333333333333333,
+  "context_utilization": 0.34712053818788413,
+  "context_relevancy": 0.1120026888642334,
+  "answer_relevancy": 0.7455570356847625
+},
+{
+  "faithfulness": 0.65,
+  "context_utilization": 0.34712053818788413,
+  "context_relevancy": 0.0993042761658207,
+  "answer_relevancy": 0.7376780609996703
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_gn_5.json b/gnqa/data/study1/results/human/scores_de_gn_5.json
new file mode 100644
index 0000000..5148d68
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_gn_5.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.8007395937295169,
+  "context_relevancy": 0.049944862903025335,
+  "answer_relevancy": 0.8599243307705603
+},
+{
+  "faithfulness": 0.8,
+  "context_utilization": 0.806603791260579,
+  "context_relevancy": 0.049944862903025335,
+  "answer_relevancy": 0.6986715526356269
+},
+{
+  "faithfulness": 0.9,
+  "context_utilization": 0.806603791260579,
+  "context_relevancy": 0.049944862903025335,
+  "answer_relevancy": 0.8579006890252776
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/human/scores_de_gn_6.json b/gnqa/data/study1/results/human/scores_de_gn_6.json
new file mode 100644
index 0000000..25d04cf
--- /dev/null
+++ b/gnqa/data/study1/results/human/scores_de_gn_6.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999919545,
+  "context_relevancy": 0.20662768031189083,
+  "answer_relevancy": 0.9302858689849556
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999919545,
+  "context_relevancy": 0.2584795321637427,
+  "answer_relevancy": 0.9258655139523131
+},
+{
+  "faithfulness": 1.0,
+  "context_utilization": 0.9999999999919545,
+  "context_relevancy": 0.1992202729044834,
+  "answer_relevancy": 0.9219977486705678
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/llamaeval_general1.json b/gnqa/data/study1/results/llamaeval_general1.json
new file mode 100644
index 0000000..d9d134f
--- /dev/null
+++ b/gnqa/data/study1/results/llamaeval_general1.json
@@ -0,0 +1,13 @@
+,
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.924645390070922,
+  "context_utilization": NaN
+},
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.8,
+  "context_utilization": NaN
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/results.json b/gnqa/data/study1/results/results.json
new file mode 100644
index 0000000..4b30b95
--- /dev/null
+++ b/gnqa/data/study1/results/results.json
@@ -0,0 +1,20 @@
+{'faithfulness': nan, 'answer_relevancy': nan, 'context_relevancy': 0.7412, 'context_utilization': nan}
+,
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.5342715544752126,
+  "context_utilization": NaN
+},
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.523524948140371,
+  "context_utilization": NaN
+},
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.6374515308316596,
+  "context_utilization": NaN
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/results_aging.json b/gnqa/data/study1/results/results_aging.json
new file mode 100644
index 0000000..7fad8ff
--- /dev/null
+++ b/gnqa/data/study1/results/results_aging.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.726235827137375,
+  "context_utilization": NaN
+},
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.7121415843797659,
+  "context_utilization": NaN
+},
+{
+  "faithfulness": NaN,
+  "answer_relevancy": NaN,
+  "context_relevancy": 0.7374184453992012,
+  "context_utilization": NaN
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/test.json b/gnqa/data/study1/results/test.json
new file mode 100644
index 0000000..c8fa2d4
--- /dev/null
+++ b/gnqa/data/study1/results/test.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 0.75,
+  "answer_relevancy": 0.0,
+  "context_relevancy": 0.12244897959183673,
+  "context_utilization": 0.999999999990909
+},
+{
+  "faithfulness": 0.75,
+  "answer_relevancy": 0.0,
+  "context_relevancy": 0.12244897959183673,
+  "context_utilization": 0.999999999990909
+},
+{
+  "faithfulness": 0.75,
+  "answer_relevancy": 0.0,
+  "context_relevancy": 0.14285714285714285,
+  "context_utilization": 0.999999999990909
+}
\ No newline at end of file
diff --git a/gnqa/data/study1/results/test2.json b/gnqa/data/study1/results/test2.json
new file mode 100644
index 0000000..9ae1d2d
--- /dev/null
+++ b/gnqa/data/study1/results/test2.json
@@ -0,0 +1,19 @@
+,
+{
+  "faithfulness": 1.0,
+  "answer_relevancy": 0.982746184788807,
+  "context_relevancy": 0.09375,
+  "context_utilization": 0.99999999999
+},
+{
+  "faithfulness": 0.9565217391304348,
+  "answer_relevancy": 0.982746184788807,
+  "context_relevancy": 0.09375,
+  "context_utilization": 0.99999999999
+},
+{
+  "faithfulness": 0.9629629629629629,
+  "answer_relevancy": 0.9827409808824336,
+  "context_relevancy": 0.09375,
+  "context_utilization": 0.99999999999
+}
\ No newline at end of file
-- 
cgit v1.2.3