diff options
Diffstat (limited to 'R2R/r2r/providers/eval')
-rwxr-xr-x | R2R/r2r/providers/eval/__init__.py | 3 | ||||
-rwxr-xr-x | R2R/r2r/providers/eval/llm/base_llm_eval.py | 84 |
2 files changed, 87 insertions, 0 deletions
diff --git a/R2R/r2r/providers/eval/__init__.py b/R2R/r2r/providers/eval/__init__.py new file mode 100755 index 00000000..3f5e1b51 --- /dev/null +++ b/R2R/r2r/providers/eval/__init__.py @@ -0,0 +1,3 @@ +from .llm.base_llm_eval import LLMEvalProvider + +__all__ = ["LLMEvalProvider"] diff --git a/R2R/r2r/providers/eval/llm/base_llm_eval.py b/R2R/r2r/providers/eval/llm/base_llm_eval.py new file mode 100755 index 00000000..7c573a34 --- /dev/null +++ b/R2R/r2r/providers/eval/llm/base_llm_eval.py @@ -0,0 +1,84 @@ +from fractions import Fraction +from typing import Union + +from r2r import EvalConfig, EvalProvider, LLMProvider, PromptProvider +from r2r.base.abstractions.llm import GenerationConfig + + +class LLMEvalProvider(EvalProvider): + def __init__( + self, + config: EvalConfig, + llm_provider: LLMProvider, + prompt_provider: PromptProvider, + ): + super().__init__(config) + + self.llm_provider = llm_provider + self.prompt_provider = prompt_provider + + def _calc_query_context_relevancy(self, query: str, context: str) -> float: + system_prompt = self.prompt_provider.get_prompt("default_system") + eval_prompt = self.prompt_provider.get_prompt( + "rag_context_eval", {"query": query, "context": context} + ) + response = self.llm_provider.get_completion( + self.prompt_provider._get_message_payload( + system_prompt, eval_prompt + ), + self.eval_generation_config, + ) + response_text = response.choices[0].message.content + fraction = ( + response_text + # Get the fraction in the returned tuple + .split(",")[-1][:-1] + # Remove any quotes and spaces + .replace("'", "") + .replace('"', "") + .strip() + ) + return float(Fraction(fraction)) + + def _calc_answer_grounding( + self, query: str, context: str, answer: str + ) -> float: + system_prompt = self.prompt_provider.get_prompt("default_system") + eval_prompt = self.prompt_provider.get_prompt( + "rag_answer_eval", + {"query": query, "context": context, "answer": answer}, + ) + response = self.llm_provider.get_completion( + self.prompt_provider._get_message_payload( + system_prompt, eval_prompt + ), + self.eval_generation_config, + ) + response_text = response.choices[0].message.content + fraction = ( + response_text + # Get the fraction in the returned tuple + .split(",")[-1][:-1] + # Remove any quotes and spaces + .replace("'", "") + .replace('"', "") + .strip() + ) + return float(Fraction(fraction)) + + def _evaluate( + self, + query: str, + context: str, + answer: str, + eval_generation_config: GenerationConfig, + ) -> dict[str, dict[str, Union[str, float]]]: + self.eval_generation_config = eval_generation_config + query_context_relevancy = self._calc_query_context_relevancy( + query, context + ) + answer_grounding = self._calc_answer_grounding(query, context, answer) + return { + "query_context_relevancy": query_context_relevancy, + "answer_grounding": answer_grounding, + } |