aboutsummaryrefslogtreecommitdiff
path: root/gnqa/paper1_eval
diff options
context:
space:
mode:
authorShelbySolomonDarnell2024-10-17 12:24:26 +0300
committerShelbySolomonDarnell2024-10-17 12:24:26 +0300
commit00cba4b9a1e88891f1f96a1199320092c1962343 (patch)
tree270fd06daa18b2fc5687ee72d912cad771354bb0 /gnqa/paper1_eval
parente0b2b0e55049b89805f73f291df1e28fa05487fe (diff)
downloadgn-ai-master.tar.gz
Docker image built to run code, all evals run using R2RHEADmaster
Diffstat (limited to 'gnqa/paper1_eval')
-rw-r--r--gnqa/paper1_eval/src/data/results/test.json19
-rw-r--r--gnqa/paper1_eval/src/data/results/test2.json19
-rw-r--r--gnqa/paper1_eval/src/ragas_fahamuRAG.py33
3 files changed, 57 insertions, 14 deletions
diff --git a/gnqa/paper1_eval/src/data/results/test.json b/gnqa/paper1_eval/src/data/results/test.json
new file mode 100644
index 0000000..c8fa2d4
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/test.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 0.75,
+ "answer_relevancy": 0.0,
+ "context_relevancy": 0.12244897959183673,
+ "context_utilization": 0.999999999990909
+},
+{
+ "faithfulness": 0.75,
+ "answer_relevancy": 0.0,
+ "context_relevancy": 0.12244897959183673,
+ "context_utilization": 0.999999999990909
+},
+{
+ "faithfulness": 0.75,
+ "answer_relevancy": 0.0,
+ "context_relevancy": 0.14285714285714285,
+ "context_utilization": 0.999999999990909
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/data/results/test2.json b/gnqa/paper1_eval/src/data/results/test2.json
new file mode 100644
index 0000000..9ae1d2d
--- /dev/null
+++ b/gnqa/paper1_eval/src/data/results/test2.json
@@ -0,0 +1,19 @@
+,
+{
+ "faithfulness": 1.0,
+ "answer_relevancy": 0.982746184788807,
+ "context_relevancy": 0.09375,
+ "context_utilization": 0.99999999999
+},
+{
+ "faithfulness": 0.9565217391304348,
+ "answer_relevancy": 0.982746184788807,
+ "context_relevancy": 0.09375,
+ "context_utilization": 0.99999999999
+},
+{
+ "faithfulness": 0.9629629629629629,
+ "answer_relevancy": 0.9827409808824336,
+ "context_relevancy": 0.09375,
+ "context_utilization": 0.99999999999
+} \ No newline at end of file
diff --git a/gnqa/paper1_eval/src/ragas_fahamuRAG.py b/gnqa/paper1_eval/src/ragas_fahamuRAG.py
index 345483e..8955a66 100644
--- a/gnqa/paper1_eval/src/ragas_fahamuRAG.py
+++ b/gnqa/paper1_eval/src/ragas_fahamuRAG.py
@@ -1,13 +1,16 @@
+#!/usr/bin/python3
import os
import sys
import json
import time
import configparser
-import pandas as pd
+#import pandas as pd
-from pandas import DataFrame as df
-from langchain_together import Together
-from langchain_together.embeddings import TogetherEmbeddings
+#from pandas import DataFrame as df
+#from langchain_together import Together
+#from langchain_together.embeddings import TogetherEmbeddings
+#from ragas.metrics import (faithfulness, answer_relevancy, context_relevancy, context_utilization, context_recall)
+# using ragas==0.1.9
from ragas.metrics import (faithfulness, answer_relevancy, context_relevancy, context_utilization)
from ragas import evaluate
from datasets import Dataset#, load_dataset
@@ -15,7 +18,8 @@ from datasets import Dataset#, load_dataset
def evaluateDataset(num_evaluations, dataset, output_file):
for n in range(0,num_evaluations):
- results = evaluate(dataset, metrics=[faithfulness,context_utilization,context_relevancy,answer_relevancy], raise_exceptions=False)
+ #results = evaluate(dataset, metrics=[faithfulness,context_utilization,context_relevancy,answer_relevancy], raise_exceptions=False)
+ results = evaluate(dataset, metrics=[faithfulness,answer_relevancy, context_relevancy, context_utilization])
print(results)
with open(output_file, "a") as the_data:
the_data.write(",\n")
@@ -25,24 +29,25 @@ def evaluateDataset(num_evaluations, dataset, output_file):
config = configparser.ConfigParser()
-config.read('_config.cfg')
+#config.read('/home/shebes/Coding/gn-ai/gnqa/paper1_eval/src/_config.cfg')
+config.read('/code/paper1_eval/src/_config.cfg')
os.environ["OPENAI_API_KEY"] = config['key.api']['openai2']
together_key = config['key.api']['togetherai']
#embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval")
-embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-32k-retrieval")
+#embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-32k-retrieval")
-together_completion = Together(
+#together_completion = Together(
#model="NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
#model="togethercomputer/Llama-2-7B-32K-Instruct",
#model="meta-llama/Llama-3-70b-chat-hf",
- model="google/gemma-7b-it",
- temperature=0.8,
- max_tokens=4000,
- top_k=1,
- together_api_key=together_key
-)
+# model="google/gemma-7b-it",
+# temperature=0.8,
+# max_tokens=4000,
+# top_k=1,
+# together_api_key=together_key
+#)
read_file = str(sys.argv[1])
outp_file = str(sys.argv[2])