from abc import ABC, abstractmethod
from typing import Any, Optional, Type
from pydantic import BaseModel
class InnerConfig(BaseModel, ABC):
"""A base provider configuration class."""
extra_fields: dict[str, Any] = {}
class Config:
populate_by_name = True
arbitrary_types_allowed = True
ignore_extra = True
@classmethod
def create(cls: Type["InnerConfig"], **kwargs: Any) -> "InnerConfig":
base_args = cls.model_fields.keys()
filtered_kwargs = {
k: v if v != "None" else None
for k, v in kwargs.items()
if k in base_args
}
instance = cls(**filtered_kwargs) # type: ignore
for k, v in kwargs.items():
if k not in base_args:
instance.extra_fields[k] = v
return instance
class AppConfig(InnerConfig):
project_name: Optional[str] = None
default_max_documents_per_user: Optional[int] = 100
default_max_chunks_per_user: Optional[int] = 10_000
default_max_collections_per_user: Optional[int] = 5
default_max_upload_size: int = 2_000_000 # e.g. ~2 MB
quality_llm: Optional[str] = None
fast_llm: Optional[str] = None
vlm: Optional[str] = None
audio_lm: Optional[str] = None
reasoning_llm: Optional[str] = None
planning_llm: Optional[str] = None
# File extension to max-size mapping
# These are examples; adjust sizes as needed.
max_upload_size_by_type: dict[str, int] = {
# Common text-based formats
"txt": 2_000_000,
"md": 2_000_000,
"tsv": 2_000_000,
"csv": 5_000_000,
"xml": 2_000_000,
"html": 5_000_000,
# Office docs
"doc": 10_000_000,
"docx": 10_000_000,
"ppt": 20_000_000,
"pptx": 20_000_000,
"xls": 10_000_000,
"xlsx": 10_000_000,
"odt": 5_000_000,
# PDFs can expand quite a bit when converted to text
"pdf": 30_000_000,
# E-mail
"eml": 5_000_000,
"msg": 5_000_000,
"p7s": 5_000_000,
# Images
"bmp": 5_000_000,
"heic": 5_000_000,
"jpeg": 5_000_000,
"jpg": 5_000_000,
"png": 5_000_000,
"tiff": 5_000_000,
# Others
"epub": 10_000_000,
"rtf": 5_000_000,
"rst": 5_000_000,
"org": 5_000_000,
}
class ProviderConfig(BaseModel, ABC):
"""A base provider configuration class."""
app: AppConfig # Add an app_config field
extra_fields: dict[str, Any] = {}
provider: Optional[str] = None
class Config:
populate_by_name = True
arbitrary_types_allowed = True
ignore_extra = True
@abstractmethod
def validate_config(self) -> None:
pass
@classmethod
def create(cls: Type["ProviderConfig"], **kwargs: Any) -> "ProviderConfig":
base_args = cls.model_fields.keys()
filtered_kwargs = {
k: v if v != "None" else None
for k, v in kwargs.items()
if k in base_args
}
instance = cls(**filtered_kwargs) # type: ignore
for k, v in kwargs.items():
if k not in base_args:
instance.extra_fields[k] = v
return instance
@property
@abstractmethod
def supported_providers(self) -> list[str]:
"""Define a list of supported providers."""
pass
@classmethod
def from_dict(
cls: Type["ProviderConfig"], data: dict[str, Any]
) -> "ProviderConfig":
"""Create a new instance of the config from a dictionary."""
return cls.create(**data)
class Provider(ABC):
"""A base provider class to provide a common interface for all
providers."""
def __init__(self, config: ProviderConfig, *args, **kwargs):
if config:
config.validate_config()
self.config = config