diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /R2R/r2r/parsers/media/openai_helpers.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz |
Diffstat (limited to 'R2R/r2r/parsers/media/openai_helpers.py')
-rwxr-xr-x | R2R/r2r/parsers/media/openai_helpers.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/R2R/r2r/parsers/media/openai_helpers.py b/R2R/r2r/parsers/media/openai_helpers.py new file mode 100755 index 00000000..707dadda --- /dev/null +++ b/R2R/r2r/parsers/media/openai_helpers.py @@ -0,0 +1,58 @@ +"""Implementations of parsers for different data types.""" + +import requests + + +def process_frame_with_openai( + data: bytes, + api_key: str, + model: str = "gpt-4o", + max_tokens: int = 2_048, + api_base: str = "https://api.openai.com/v1/chat/completions", +) -> str: + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + } + + payload = { + "model": model, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "First, provide a title for the image, then explain everything that you see. Be very thorough in your analysis as a user will need to understand the image without seeing it. If it is possible to transcribe the image to text directly, then do so. The more detail you provide, the better the user will understand the image.", + }, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{data}"}, + }, + ], + } + ], + "max_tokens": max_tokens, + } + + response = requests.post(api_base, headers=headers, json=payload) + response_json = response.json() + return response_json["choices"][0]["message"]["content"] + + +def process_audio_with_openai( + audio_file, + api_key: str, + audio_api_base: str = "https://api.openai.com/v1/audio/transcriptions", +) -> str: + headers = {"Authorization": f"Bearer {api_key}"} + + transcription_response = requests.post( + audio_api_base, + headers=headers, + files={"file": audio_file}, + data={"model": "whisper-1"}, + ) + transcription = transcription_response.json() + + return transcription["text"] |