two version of R2R are hereHEAD master

author: S. Solomon Darnell 2025-03-28 21:52:21 -0500
committer: S. Solomon Darnell 2025-03-28 21:52:21 -0500
commit: 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree: ee3dc5af3b6313e921cd920906356f5d4febc4ed /R2R/r2r/parsers/media/openai_helpers.py
parent: cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download: gn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz
1 files changed, 58 insertions, 0 deletions
diff --git a/R2R/r2r/parsers/media/openai_helpers.py b/R2R/r2r/parsers/media/openai_helpers.py
new file mode 100755
index 00000000..707dadda
--- /dev/null
+++ b/R2R/r2r/parsers/media/openai_helpers.py
@@ -0,0 +1,58 @@
+"""Implementations of parsers for different data types."""
+
+import requests
+
+
+def process_frame_with_openai(
+    data: bytes,
+    api_key: str,
+    model: str = "gpt-4o",
+    max_tokens: int = 2_048,
+    api_base: str = "https://api.openai.com/v1/chat/completions",
+) -> str:
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}",
+    }
+
+    payload = {
+        "model": model,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "First, provide a title for the image, then explain everything that you see. Be very thorough in your analysis as a user will need to understand the image without seeing it. If it is possible to transcribe the image to text directly, then do so. The more detail you provide, the better the user will understand the image.",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{data}"},
+                    },
+                ],
+            }
+        ],
+        "max_tokens": max_tokens,
+    }
+
+    response = requests.post(api_base, headers=headers, json=payload)
+    response_json = response.json()
+    return response_json["choices"][0]["message"]["content"]
+
+
+def process_audio_with_openai(
+    audio_file,
+    api_key: str,
+    audio_api_base: str = "https://api.openai.com/v1/audio/transcriptions",
+) -> str:
+    headers = {"Authorization": f"Bearer {api_key}"}
+
+    transcription_response = requests.post(
+        audio_api_base,
+        headers=headers,
+        files={"file": audio_file},
+        data={"model": "whisper-1"},
+    )
+    transcription = transcription_response.json()
+
+    return transcription["text"]
author	S. Solomon Darnell	2025-03-28 21:52:21 -0500
committer	S. Solomon Darnell	2025-03-28 21:52:21 -0500
commit	4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree	ee3dc5af3b6313e921cd920906356f5d4febc4ed /R2R/r2r/parsers/media/openai_helpers.py
parent	cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download	gn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz