blob: 7c40656a63e7d655a6f7275b260b828b7256f9a7 (
about) (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
import os
from typing import AsyncGenerator
from r2r.base.abstractions.document import DataType
from r2r.base.parsers.base_parser import AsyncParser
from r2r.parsers.media.openai_helpers import process_frame_with_openai
class ImageParser(AsyncParser[DataType]):
"""A parser for image data."""
def __init__(
self,
model: str = "gpt-4o",
max_tokens: int = 2_048,
api_base: str = "https://api.openai.com/v1/chat/completions",
):
self.model = model
self.max_tokens = max_tokens
self.openai_api_key = os.environ.get("OPENAI_API_KEY")
if not self.openai_api_key:
raise ValueError(
"Error, environment variable `OPENAI_API_KEY` is required to run `ImageParser`."
)
self.api_base = api_base
async def ingest(self, data: DataType) -> AsyncGenerator[str, None]:
"""Ingest image data and yield a description."""
if isinstance(data, bytes):
import base64
data = base64.b64encode(data).decode("utf-8")
yield process_frame_with_openai(
data,
self.openai_api_key,
self.model,
self.max_tokens,
self.api_base,
)
|