aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/core/parsers/media/bmp_parser.py
blob: 78646da7b0b3815539e4e94418b8580bd457474e (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# type: ignore
from typing import AsyncGenerator

from core.base.parsers.base_parser import AsyncParser
from core.base.providers import (
    CompletionProvider,
    DatabaseProvider,
    IngestionConfig,
)


class BMPParser(AsyncParser[str | bytes]):
    """A parser for BMP image data."""

    def __init__(
        self,
        config: IngestionConfig,
        database_provider: DatabaseProvider,
        llm_provider: CompletionProvider,
    ):
        self.database_provider = database_provider
        self.llm_provider = llm_provider
        self.config = config

        import struct

        self.struct = struct

    async def extract_bmp_metadata(self, data: bytes) -> dict:
        """Extract metadata from BMP file header."""
        try:
            # BMP header format
            header_format = "<2sIHHI"
            header_size = self.struct.calcsize(header_format)

            # Unpack header data
            (
                signature,
                file_size,
                reserved,
                reserved2,
                data_offset,
            ) = self.struct.unpack(header_format, data[:header_size])

            # DIB header
            dib_format = "<IiiHHIIiiII"
            dib_size = self.struct.calcsize(dib_format)
            dib_data = self.struct.unpack(dib_format, data[14 : 14 + dib_size])

            width = dib_data[1]
            height = abs(dib_data[2])  # Height can be negative
            bits_per_pixel = dib_data[4]
            compression = dib_data[5]

            return {
                "width": width,
                "height": height,
                "bits_per_pixel": bits_per_pixel,
                "file_size": file_size,
                "compression": compression,
            }
        except Exception as e:
            return {"error": f"Failed to parse BMP header: {str(e)}"}

    async def ingest(
        self, data: str | bytes, **kwargs
    ) -> AsyncGenerator[str, None]:
        """Ingest BMP data and yield metadata description."""
        if isinstance(data, str):
            # Convert base64 string to bytes if needed
            import base64

            data = base64.b64decode(data)

        metadata = await self.extract_bmp_metadata(data)

        # Generate description of the BMP file
        yield f"BMP image with dimensions {metadata.get('width', 'unknown')}x{metadata.get('height', 'unknown')} pixels, {metadata.get('bits_per_pixel', 'unknown')} bits per pixel, file size: {metadata.get('file_size', 'unknown')} bytes"