diff options
Diffstat (limited to 'gnqa/paper1_eval')
-rw-r--r-- | gnqa/paper1_eval/src/data/results/test.json | 19 | ||||
-rw-r--r-- | gnqa/paper1_eval/src/data/results/test2.json | 19 | ||||
-rw-r--r-- | gnqa/paper1_eval/src/ragas_fahamuRAG.py | 33 |
3 files changed, 57 insertions, 14 deletions
diff --git a/gnqa/paper1_eval/src/data/results/test.json b/gnqa/paper1_eval/src/data/results/test.json new file mode 100644 index 0000000..c8fa2d4 --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/test.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 0.75, + "answer_relevancy": 0.0, + "context_relevancy": 0.12244897959183673, + "context_utilization": 0.999999999990909 +}, +{ + "faithfulness": 0.75, + "answer_relevancy": 0.0, + "context_relevancy": 0.12244897959183673, + "context_utilization": 0.999999999990909 +}, +{ + "faithfulness": 0.75, + "answer_relevancy": 0.0, + "context_relevancy": 0.14285714285714285, + "context_utilization": 0.999999999990909 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/data/results/test2.json b/gnqa/paper1_eval/src/data/results/test2.json new file mode 100644 index 0000000..9ae1d2d --- /dev/null +++ b/gnqa/paper1_eval/src/data/results/test2.json @@ -0,0 +1,19 @@ +, +{ + "faithfulness": 1.0, + "answer_relevancy": 0.982746184788807, + "context_relevancy": 0.09375, + "context_utilization": 0.99999999999 +}, +{ + "faithfulness": 0.9565217391304348, + "answer_relevancy": 0.982746184788807, + "context_relevancy": 0.09375, + "context_utilization": 0.99999999999 +}, +{ + "faithfulness": 0.9629629629629629, + "answer_relevancy": 0.9827409808824336, + "context_relevancy": 0.09375, + "context_utilization": 0.99999999999 +}
\ No newline at end of file diff --git a/gnqa/paper1_eval/src/ragas_fahamuRAG.py b/gnqa/paper1_eval/src/ragas_fahamuRAG.py index 345483e..8955a66 100644 --- a/gnqa/paper1_eval/src/ragas_fahamuRAG.py +++ b/gnqa/paper1_eval/src/ragas_fahamuRAG.py @@ -1,13 +1,16 @@ +#!/usr/bin/python3 import os import sys import json import time import configparser -import pandas as pd +#import pandas as pd -from pandas import DataFrame as df -from langchain_together import Together -from langchain_together.embeddings import TogetherEmbeddings +#from pandas import DataFrame as df +#from langchain_together import Together +#from langchain_together.embeddings import TogetherEmbeddings +#from ragas.metrics import (faithfulness, answer_relevancy, context_relevancy, context_utilization, context_recall) +# using ragas==0.1.9 from ragas.metrics import (faithfulness, answer_relevancy, context_relevancy, context_utilization) from ragas import evaluate from datasets import Dataset#, load_dataset @@ -15,7 +18,8 @@ from datasets import Dataset#, load_dataset def evaluateDataset(num_evaluations, dataset, output_file): for n in range(0,num_evaluations): - results = evaluate(dataset, metrics=[faithfulness,context_utilization,context_relevancy,answer_relevancy], raise_exceptions=False) + #results = evaluate(dataset, metrics=[faithfulness,context_utilization,context_relevancy,answer_relevancy], raise_exceptions=False) + results = evaluate(dataset, metrics=[faithfulness,answer_relevancy, context_relevancy, context_utilization]) print(results) with open(output_file, "a") as the_data: the_data.write(",\n") @@ -25,24 +29,25 @@ def evaluateDataset(num_evaluations, dataset, output_file): config = configparser.ConfigParser() -config.read('_config.cfg') +#config.read('/home/shebes/Coding/gn-ai/gnqa/paper1_eval/src/_config.cfg') +config.read('/code/paper1_eval/src/_config.cfg') os.environ["OPENAI_API_KEY"] = config['key.api']['openai2'] together_key = config['key.api']['togetherai'] #embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval") -embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-32k-retrieval") +#embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-32k-retrieval") -together_completion = Together( +#together_completion = Together( #model="NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", #model="togethercomputer/Llama-2-7B-32K-Instruct", #model="meta-llama/Llama-3-70b-chat-hf", - model="google/gemma-7b-it", - temperature=0.8, - max_tokens=4000, - top_k=1, - together_api_key=together_key -) +# model="google/gemma-7b-it", +# temperature=0.8, +# max_tokens=4000, +# top_k=1, +# together_api_key=together_key +#) read_file = str(sys.argv[1]) outp_file = str(sys.argv[2]) |