import os from typing import AsyncGenerator from r2r.base.abstractions.document import DataType from r2r.base.parsers.base_parser import AsyncParser from r2r.parsers.media.openai_helpers import process_frame_with_openai class ImageParser(AsyncParser[DataType]): """A parser for image data.""" def __init__( self, model: str = "gpt-4o", max_tokens: int = 2_048, api_base: str = "https://api.openai.com/v1/chat/completions", ): self.model = model self.max_tokens = max_tokens self.openai_api_key = os.environ.get("OPENAI_API_KEY") if not self.openai_api_key: raise ValueError( "Error, environment variable `OPENAI_API_KEY` is required to run `ImageParser`." ) self.api_base = api_base async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: """Ingest image data and yield a description.""" if isinstance(data, bytes): import base64 data = base64.b64encode(data).decode("utf-8") yield process_frame_with_openai( data, self.openai_api_key, self.model, self.max_tokens, self.api_base, )