aboutsummaryrefslogtreecommitdiff
# type: ignore
from typing import AsyncGenerator

from core.base.parsers.base_parser import AsyncParser
from core.base.providers import (
    CompletionProvider,
    DatabaseProvider,
    IngestionConfig,
)


class BMPParser(AsyncParser[str | bytes]):
    """A parser for BMP image data."""

    def __init__(
        self,
        config: IngestionConfig,
        database_provider: DatabaseProvider,
        llm_provider: CompletionProvider,
    ):
        self.database_provider = database_provider
        self.llm_provider = llm_provider
        self.config = config

        import struct

        self.struct = struct

    async def extract_bmp_metadata(self, data: bytes) -> dict:
        """Extract metadata from BMP file header."""
        try:
            # BMP header format
            header_format = "<2sIHHI"
            header_size = self.struct.calcsize(header_format)

            # Unpack header data
            (
                signature,
                file_size,
                reserved,
                reserved2,
                data_offset,
            ) = self.struct.unpack(header_format, data[:header_size])

            # DIB header
            dib_format = "<IiiHHIIiiII"
            dib_size = self.struct.calcsize(dib_format)
            dib_data = self.struct.unpack(dib_format, data[14 : 14 + dib_size])

            width = dib_data[1]
            height = abs(dib_data[2])  # Height can be negative
            bits_per_pixel = dib_data[4]
            compression = dib_data[5]

            return {
                "width": width,
                "height": height,
                "bits_per_pixel": bits_per_pixel,
                "file_size": file_size,
                "compression": compression,
            }
        except Exception as e:
            return {"error": f"Failed to parse BMP header: {str(e)}"}

    async def ingest(
        self, data: str | bytes, **kwargs
    ) -> AsyncGenerator[str, None]:
        """Ingest BMP data and yield metadata description."""
        if isinstance(data, str):
            # Convert base64 string to bytes if needed
            import base64

            data = base64.b64decode(data)

        metadata = await self.extract_bmp_metadata(data)

        # Generate description of the BMP file
        yield f"BMP image with dimensions {metadata.get('width', 'unknown')}x{metadata.get('height', 'unknown')} pixels, {metadata.get('bits_per_pixel', 'unknown')} bits per pixel, file size: {metadata.get('file_size', 'unknown')} bytes"