1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
# type: ignore
from typing import AsyncGenerator
from core.base.parsers.base_parser import AsyncParser
from core.base.providers import (
CompletionProvider,
DatabaseProvider,
IngestionConfig,
)
class BMPParser(AsyncParser[str | bytes]):
"""A parser for BMP image data."""
def __init__(
self,
config: IngestionConfig,
database_provider: DatabaseProvider,
llm_provider: CompletionProvider,
):
self.database_provider = database_provider
self.llm_provider = llm_provider
self.config = config
import struct
self.struct = struct
async def extract_bmp_metadata(self, data: bytes) -> dict:
"""Extract metadata from BMP file header."""
try:
# BMP header format
header_format = "<2sIHHI"
header_size = self.struct.calcsize(header_format)
# Unpack header data
(
signature,
file_size,
reserved,
reserved2,
data_offset,
) = self.struct.unpack(header_format, data[:header_size])
# DIB header
dib_format = "<IiiHHIIiiII"
dib_size = self.struct.calcsize(dib_format)
dib_data = self.struct.unpack(dib_format, data[14 : 14 + dib_size])
width = dib_data[1]
height = abs(dib_data[2]) # Height can be negative
bits_per_pixel = dib_data[4]
compression = dib_data[5]
return {
"width": width,
"height": height,
"bits_per_pixel": bits_per_pixel,
"file_size": file_size,
"compression": compression,
}
except Exception as e:
return {"error": f"Failed to parse BMP header: {str(e)}"}
async def ingest(
self, data: str | bytes, **kwargs
) -> AsyncGenerator[str, None]:
"""Ingest BMP data and yield metadata description."""
if isinstance(data, str):
# Convert base64 string to bytes if needed
import base64
data = base64.b64decode(data)
metadata = await self.extract_bmp_metadata(data)
# Generate description of the BMP file
yield f"BMP image with dimensions {metadata.get('width', 'unknown')}x{metadata.get('height', 'unknown')} pixels, {metadata.get('bits_per_pixel', 'unknown')} bits per pixel, file size: {metadata.get('file_size', 'unknown')} bytes"
|