from fractions import Fraction
from typing import Union
from r2r import EvalConfig, EvalProvider, LLMProvider, PromptProvider
from r2r.base.abstractions.llm import GenerationConfig
class LLMEvalProvider(EvalProvider):
def __init__(
self,
config: EvalConfig,
llm_provider: LLMProvider,
prompt_provider: PromptProvider,
):
super().__init__(config)
self.llm_provider = llm_provider
self.prompt_provider = prompt_provider
def _calc_query_context_relevancy(self, query: str, context: str) -> float:
system_prompt = self.prompt_provider.get_prompt("default_system")
eval_prompt = self.prompt_provider.get_prompt(
"rag_context_eval", {"query": query, "context": context}
)
response = self.llm_provider.get_completion(
self.prompt_provider._get_message_payload(
system_prompt, eval_prompt
),
self.eval_generation_config,
)
response_text = response.choices[0].message.content
fraction = (
response_text
# Get the fraction in the returned tuple
.split(",")[-1][:-1]
# Remove any quotes and spaces
.replace("'", "")
.replace('"', "")
.strip()
)
return float(Fraction(fraction))
def _calc_answer_grounding(
self, query: str, context: str, answer: str
) -> float:
system_prompt = self.prompt_provider.get_prompt("default_system")
eval_prompt = self.prompt_provider.get_prompt(
"rag_answer_eval",
{"query": query, "context": context, "answer": answer},
)
response = self.llm_provider.get_completion(
self.prompt_provider._get_message_payload(
system_prompt, eval_prompt
),
self.eval_generation_config,
)
response_text = response.choices[0].message.content
fraction = (
response_text
# Get the fraction in the returned tuple
.split(",")[-1][:-1]
# Remove any quotes and spaces
.replace("'", "")
.replace('"', "")
.strip()
)
return float(Fraction(fraction))
def _evaluate(
self,
query: str,
context: str,
answer: str,
eval_generation_config: GenerationConfig,
) -> dict[str, dict[str, Union[str, float]]]:
self.eval_generation_config = eval_generation_config
query_context_relevancy = self._calc_query_context_relevancy(
query, context
)
answer_grounding = self._calc_answer_grounding(query, context, answer)
return {
"query_context_relevancy": query_context_relevancy,
"answer_grounding": answer_grounding,
}