# type: ignore from typing import AsyncGenerator from bs4 import BeautifulSoup from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, DatabaseProvider, IngestionConfig, ) class HTMLParser(AsyncParser[str | bytes]): """A parser for HTML data.""" def __init__( self, config: IngestionConfig, database_provider: DatabaseProvider, llm_provider: CompletionProvider, ): self.database_provider = database_provider self.llm_provider = llm_provider self.config = config async def ingest( self, data: str | bytes, *args, **kwargs ) -> AsyncGenerator[str, None]: """Ingest HTML data and yield text.""" soup = BeautifulSoup(data, "html.parser") yield soup.get_text()