about summary refs log tree commit diff
path: root/R2R/r2r/providers/eval/llm
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /R2R/r2r/providers/eval/llm
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to 'R2R/r2r/providers/eval/llm')
-rwxr-xr-xR2R/r2r/providers/eval/llm/base_llm_eval.py84
1 files changed, 84 insertions, 0 deletions
diff --git a/R2R/r2r/providers/eval/llm/base_llm_eval.py b/R2R/r2r/providers/eval/llm/base_llm_eval.py
new file mode 100755
index 00000000..7c573a34
--- /dev/null
+++ b/R2R/r2r/providers/eval/llm/base_llm_eval.py
@@ -0,0 +1,84 @@
+from fractions import Fraction
+from typing import Union
+
+from r2r import EvalConfig, EvalProvider, LLMProvider, PromptProvider
+from r2r.base.abstractions.llm import GenerationConfig
+
+
+class LLMEvalProvider(EvalProvider):
+    def __init__(
+        self,
+        config: EvalConfig,
+        llm_provider: LLMProvider,
+        prompt_provider: PromptProvider,
+    ):
+        super().__init__(config)
+
+        self.llm_provider = llm_provider
+        self.prompt_provider = prompt_provider
+
+    def _calc_query_context_relevancy(self, query: str, context: str) -> float:
+        system_prompt = self.prompt_provider.get_prompt("default_system")
+        eval_prompt = self.prompt_provider.get_prompt(
+            "rag_context_eval", {"query": query, "context": context}
+        )
+        response = self.llm_provider.get_completion(
+            self.prompt_provider._get_message_payload(
+                system_prompt, eval_prompt
+            ),
+            self.eval_generation_config,
+        )
+        response_text = response.choices[0].message.content
+        fraction = (
+            response_text
+            # Get the fraction in the returned tuple
+            .split(",")[-1][:-1]
+            # Remove any quotes and spaces
+            .replace("'", "")
+            .replace('"', "")
+            .strip()
+        )
+        return float(Fraction(fraction))
+
+    def _calc_answer_grounding(
+        self, query: str, context: str, answer: str
+    ) -> float:
+        system_prompt = self.prompt_provider.get_prompt("default_system")
+        eval_prompt = self.prompt_provider.get_prompt(
+            "rag_answer_eval",
+            {"query": query, "context": context, "answer": answer},
+        )
+        response = self.llm_provider.get_completion(
+            self.prompt_provider._get_message_payload(
+                system_prompt, eval_prompt
+            ),
+            self.eval_generation_config,
+        )
+        response_text = response.choices[0].message.content
+        fraction = (
+            response_text
+            # Get the fraction in the returned tuple
+            .split(",")[-1][:-1]
+            # Remove any quotes and spaces
+            .replace("'", "")
+            .replace('"', "")
+            .strip()
+        )
+        return float(Fraction(fraction))
+
+    def _evaluate(
+        self,
+        query: str,
+        context: str,
+        answer: str,
+        eval_generation_config: GenerationConfig,
+    ) -> dict[str, dict[str, Union[str, float]]]:
+        self.eval_generation_config = eval_generation_config
+        query_context_relevancy = self._calc_query_context_relevancy(
+            query, context
+        )
+        answer_grounding = self._calc_answer_grounding(query, context, answer)
+        return {
+            "query_context_relevancy": query_context_relevancy,
+            "answer_grounding": answer_grounding,
+        }