two version of R2R are hereHEAD master

author: S. Solomon Darnell 2025-03-28 21:52:21 -0500
committer: S. Solomon Darnell 2025-03-28 21:52:21 -0500
commit: 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree: ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/anthropic
parent: cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download: gn-ai-master.tar.gz
255 files changed, 24112 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/anthropic/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/__init__.py
new file mode 100644
index 00000000..8cba2f09
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/__init__.py
@@ -0,0 +1,106 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from . import types
+from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
+from ._utils import file_from_path
+from ._client import (
+    Client,
+    Stream,
+    Timeout,
+    Anthropic,
+    Transport,
+    AsyncClient,
+    AsyncStream,
+    AsyncAnthropic,
+    RequestOptions,
+)
+from ._models import BaseModel
+from ._version import __title__, __version__
+from ._response import APIResponse as APIResponse, AsyncAPIResponse as AsyncAPIResponse
+from ._constants import (
+    AI_PROMPT as AI_PROMPT,
+    HUMAN_PROMPT as HUMAN_PROMPT,
+    DEFAULT_TIMEOUT,
+    DEFAULT_MAX_RETRIES,
+    DEFAULT_CONNECTION_LIMITS,
+)
+from ._exceptions import (
+    APIError,
+    ConflictError,
+    NotFoundError,
+    AnthropicError,
+    APIStatusError,
+    RateLimitError,
+    APITimeoutError,
+    BadRequestError,
+    APIConnectionError,
+    AuthenticationError,
+    InternalServerError,
+    PermissionDeniedError,
+    UnprocessableEntityError,
+    APIResponseValidationError,
+)
+from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
+from ._utils._logs import setup_logging as _setup_logging
+
+__all__ = [
+    "types",
+    "__version__",
+    "__title__",
+    "NoneType",
+    "Transport",
+    "ProxiesTypes",
+    "NotGiven",
+    "NOT_GIVEN",
+    "Omit",
+    "AnthropicError",
+    "APIError",
+    "APIStatusError",
+    "APITimeoutError",
+    "APIConnectionError",
+    "APIResponseValidationError",
+    "BadRequestError",
+    "AuthenticationError",
+    "PermissionDeniedError",
+    "NotFoundError",
+    "ConflictError",
+    "UnprocessableEntityError",
+    "RateLimitError",
+    "InternalServerError",
+    "Timeout",
+    "RequestOptions",
+    "Client",
+    "AsyncClient",
+    "Stream",
+    "AsyncStream",
+    "Anthropic",
+    "AsyncAnthropic",
+    "file_from_path",
+    "BaseModel",
+    "DEFAULT_TIMEOUT",
+    "DEFAULT_MAX_RETRIES",
+    "DEFAULT_CONNECTION_LIMITS",
+    "DefaultHttpxClient",
+    "DefaultAsyncHttpxClient",
+    "HUMAN_PROMPT",
+    "AI_PROMPT",
+]
+
+from .lib.vertex import *
+from .lib.bedrock import *
+from .lib.streaming import *
+
+_setup_logging()
+
+# Update the __module__ attribute for exported symbols so that
+# error messages point to this module instead of the module
+# it was originally defined in, e.g.
+# anthropic._exceptions.NotFoundError -> anthropic.NotFoundError
+__locals = locals()
+for __name in __all__:
+    if not __name.startswith("__"):
+        try:
+            __locals[__name].__module__ = "anthropic"
+        except (TypeError, AttributeError):
+            # Some of our exported symbols are builtins which we can't set attributes for.
+            pass
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_base_client.py b/.venv/lib/python3.12/site-packages/anthropic/_base_client.py
new file mode 100644
index 00000000..41b57e18
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_base_client.py
@@ -0,0 +1,2153 @@
+from __future__ import annotations
+
+import sys
+import json
+import time
+import uuid
+import email
+import socket
+import asyncio
+import inspect
+import logging
+import platform
+import warnings
+import email.utils
+from types import TracebackType
+from random import random
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Type,
+    Union,
+    Generic,
+    Mapping,
+    TypeVar,
+    Iterable,
+    Iterator,
+    Optional,
+    Generator,
+    AsyncIterator,
+    cast,
+    overload,
+)
+from typing_extensions import Literal, override, get_origin
+
+import anyio
+import httpx
+import distro
+import pydantic
+from httpx import URL, Limits
+from pydantic import PrivateAttr
+
+from . import _exceptions
+from ._qs import Querystring
+from ._files import to_httpx_files, async_to_httpx_files
+from ._types import (
+    NOT_GIVEN,
+    Body,
+    Omit,
+    Query,
+    Headers,
+    Timeout,
+    NotGiven,
+    ResponseT,
+    Transport,
+    AnyMapping,
+    PostParser,
+    ProxiesTypes,
+    RequestFiles,
+    HttpxSendArgs,
+    AsyncTransport,
+    RequestOptions,
+    HttpxRequestFiles,
+    ModelBuilderProtocol,
+)
+from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
+from ._compat import PYDANTIC_V2, model_copy, model_dump
+from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
+from ._response import (
+    APIResponse,
+    BaseAPIResponse,
+    AsyncAPIResponse,
+    extract_response_type,
+)
+from ._constants import (
+    DEFAULT_TIMEOUT,
+    MAX_RETRY_DELAY,
+    DEFAULT_MAX_RETRIES,
+    INITIAL_RETRY_DELAY,
+    RAW_RESPONSE_HEADER,
+    OVERRIDE_CAST_TO_HEADER,
+    DEFAULT_CONNECTION_LIMITS,
+)
+from ._streaming import Stream, SSEDecoder, AsyncStream, SSEBytesDecoder
+from ._exceptions import (
+    APIStatusError,
+    APITimeoutError,
+    APIConnectionError,
+    APIResponseValidationError,
+)
+from ._legacy_response import LegacyAPIResponse
+
+log: logging.Logger = logging.getLogger(__name__)
+
+# TODO: make base page type vars covariant
+SyncPageT = TypeVar("SyncPageT", bound="BaseSyncPage[Any]")
+AsyncPageT = TypeVar("AsyncPageT", bound="BaseAsyncPage[Any]")
+
+
+_T = TypeVar("_T")
+_T_co = TypeVar("_T_co", covariant=True)
+
+_StreamT = TypeVar("_StreamT", bound=Stream[Any])
+_AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
+
+if TYPE_CHECKING:
+    from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+else:
+    try:
+        from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+    except ImportError:
+        # taken from https://github.com/encode/httpx/blob/3ba5fe0d7ac70222590e759c31442b1cab263791/httpx/_config.py#L366
+        HTTPX_DEFAULT_TIMEOUT = Timeout(5.0)
+
+
+class PageInfo:
+    """Stores the necessary information to build the request to retrieve the next page.
+
+    Either `url` or `params` must be set.
+    """
+
+    url: URL | NotGiven
+    params: Query | NotGiven
+
+    @overload
+    def __init__(
+        self,
+        *,
+        url: URL,
+    ) -> None: ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        params: Query,
+    ) -> None: ...
+
+    def __init__(
+        self,
+        *,
+        url: URL | NotGiven = NOT_GIVEN,
+        params: Query | NotGiven = NOT_GIVEN,
+    ) -> None:
+        self.url = url
+        self.params = params
+
+    @override
+    def __repr__(self) -> str:
+        if self.url:
+            return f"{self.__class__.__name__}(url={self.url})"
+        return f"{self.__class__.__name__}(params={self.params})"
+
+
+class BasePage(GenericModel, Generic[_T]):
+    """
+    Defines the core interface for pagination.
+
+    Type Args:
+        ModelT: The pydantic model that represents an item in the response.
+
+    Methods:
+        has_next_page(): Check if there is another page available
+        next_page_info(): Get the necessary information to make a request for the next page
+    """
+
+    _options: FinalRequestOptions = PrivateAttr()
+    _model: Type[_T] = PrivateAttr()
+
+    def has_next_page(self) -> bool:
+        items = self._get_page_items()
+        if not items:
+            return False
+        return self.next_page_info() is not None
+
+    def next_page_info(self) -> Optional[PageInfo]: ...
+
+    def _get_page_items(self) -> Iterable[_T]:  # type: ignore[empty-body]
+        ...
+
+    def _params_from_url(self, url: URL) -> httpx.QueryParams:
+        # TODO: do we have to preprocess params here?
+        return httpx.QueryParams(cast(Any, self._options.params)).merge(url.params)
+
+    def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
+        options = model_copy(self._options)
+        options._strip_raw_response_header()
+
+        if not isinstance(info.params, NotGiven):
+            options.params = {**options.params, **info.params}
+            return options
+
+        if not isinstance(info.url, NotGiven):
+            params = self._params_from_url(info.url)
+            url = info.url.copy_with(params=params)
+            options.params = dict(url.params)
+            options.url = str(url)
+            return options
+
+        raise ValueError("Unexpected PageInfo state")
+
+
+class BaseSyncPage(BasePage[_T], Generic[_T]):
+    _client: SyncAPIClient = pydantic.PrivateAttr()
+
+    def _set_private_attributes(
+        self,
+        client: SyncAPIClient,
+        model: Type[_T],
+        options: FinalRequestOptions,
+    ) -> None:
+        if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
+        self._model = model
+        self._client = client
+        self._options = options
+
+    # Pydantic uses a custom `__iter__` method to support casting BaseModels
+    # to dictionaries. e.g. dict(model).
+    # As we want to support `for item in page`, this is inherently incompatible
+    # with the default pydantic behaviour. It is not possible to support both
+    # use cases at once. Fortunately, this is not a big deal as all other pydantic
+    # methods should continue to work as expected as there is an alternative method
+    # to cast a model to a dictionary, model.dict(), which is used internally
+    # by pydantic.
+    def __iter__(self) -> Iterator[_T]:  # type: ignore
+        for page in self.iter_pages():
+            for item in page._get_page_items():
+                yield item
+
+    def iter_pages(self: SyncPageT) -> Iterator[SyncPageT]:
+        page = self
+        while True:
+            yield page
+            if page.has_next_page():
+                page = page.get_next_page()
+            else:
+                return
+
+    def get_next_page(self: SyncPageT) -> SyncPageT:
+        info = self.next_page_info()
+        if not info:
+            raise RuntimeError(
+                "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`."
+            )
+
+        options = self._info_to_options(info)
+        return self._client._request_api_list(self._model, page=self.__class__, options=options)
+
+
+class AsyncPaginator(Generic[_T, AsyncPageT]):
+    def __init__(
+        self,
+        client: AsyncAPIClient,
+        options: FinalRequestOptions,
+        page_cls: Type[AsyncPageT],
+        model: Type[_T],
+    ) -> None:
+        self._model = model
+        self._client = client
+        self._options = options
+        self._page_cls = page_cls
+
+    def __await__(self) -> Generator[Any, None, AsyncPageT]:
+        return self._get_page().__await__()
+
+    async def _get_page(self) -> AsyncPageT:
+        def _parser(resp: AsyncPageT) -> AsyncPageT:
+            resp._set_private_attributes(
+                model=self._model,
+                options=self._options,
+                client=self._client,
+            )
+            return resp
+
+        self._options.post_parser = _parser
+
+        return await self._client.request(self._page_cls, self._options)
+
+    async def __aiter__(self) -> AsyncIterator[_T]:
+        # https://github.com/microsoft/pyright/issues/3464
+        page = cast(
+            AsyncPageT,
+            await self,  # type: ignore
+        )
+        async for item in page:
+            yield item
+
+
+class BaseAsyncPage(BasePage[_T], Generic[_T]):
+    _client: AsyncAPIClient = pydantic.PrivateAttr()
+
+    def _set_private_attributes(
+        self,
+        model: Type[_T],
+        client: AsyncAPIClient,
+        options: FinalRequestOptions,
+    ) -> None:
+        if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
+        self._model = model
+        self._client = client
+        self._options = options
+
+    async def __aiter__(self) -> AsyncIterator[_T]:
+        async for page in self.iter_pages():
+            for item in page._get_page_items():
+                yield item
+
+    async def iter_pages(self: AsyncPageT) -> AsyncIterator[AsyncPageT]:
+        page = self
+        while True:
+            yield page
+            if page.has_next_page():
+                page = await page.get_next_page()
+            else:
+                return
+
+    async def get_next_page(self: AsyncPageT) -> AsyncPageT:
+        info = self.next_page_info()
+        if not info:
+            raise RuntimeError(
+                "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`."
+            )
+
+        options = self._info_to_options(info)
+        return await self._client._request_api_list(self._model, page=self.__class__, options=options)
+
+
+_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
+_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
+
+
+class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
+    _client: _HttpxClientT
+    _version: str
+    _base_url: URL
+    max_retries: int
+    timeout: Union[float, Timeout, None]
+    _limits: httpx.Limits
+    _proxies: ProxiesTypes | None
+    _transport: Transport | AsyncTransport | None
+    _strict_response_validation: bool
+    _idempotency_header: str | None
+    _default_stream_cls: type[_DefaultStreamT] | None = None
+
+    def __init__(
+        self,
+        *,
+        version: str,
+        base_url: str | URL,
+        _strict_response_validation: bool,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        timeout: float | Timeout | None = DEFAULT_TIMEOUT,
+        limits: httpx.Limits,
+        transport: Transport | AsyncTransport | None,
+        proxies: ProxiesTypes | None,
+        custom_headers: Mapping[str, str] | None = None,
+        custom_query: Mapping[str, object] | None = None,
+    ) -> None:
+        self._version = version
+        self._base_url = self._enforce_trailing_slash(URL(base_url))
+        self.max_retries = max_retries
+        self.timeout = timeout
+        self._limits = limits
+        self._proxies = proxies
+        self._transport = transport
+        self._custom_headers = custom_headers or {}
+        self._custom_query = custom_query or {}
+        self._strict_response_validation = _strict_response_validation
+        self._idempotency_header = None
+        self._platform: Platform | None = None
+
+        if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
+            raise TypeError(
+                "max_retries cannot be None. If you want to disable retries, pass `0`; if you want unlimited retries, pass `math.inf` or a very high number; if you want the default behavior, pass `anthropic.DEFAULT_MAX_RETRIES`"
+            )
+
+    def _enforce_trailing_slash(self, url: URL) -> URL:
+        if url.raw_path.endswith(b"/"):
+            return url
+        return url.copy_with(raw_path=url.raw_path + b"/")
+
+    def _make_status_error_from_response(
+        self,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.is_closed and not response.is_stream_consumed:
+            # We can't read the response body as it has been closed
+            # before it was read. This can happen if an event hook
+            # raises a status error.
+            body = None
+            err_msg = f"Error code: {response.status_code}"
+        else:
+            err_text = response.text.strip()
+            body = err_text
+
+            try:
+                body = json.loads(err_text)
+                err_msg = f"Error code: {response.status_code} - {body}"
+            except Exception:
+                err_msg = err_text or f"Error code: {response.status_code}"
+
+        return self._make_status_error(err_msg, body=body, response=response)
+
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> _exceptions.APIStatusError:
+        raise NotImplementedError()
+
+    def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0) -> httpx.Headers:
+        custom_headers = options.headers or {}
+        headers_dict = _merge_mappings(
+            {
+                "x-stainless-timeout": str(options.timeout.read)
+                if isinstance(options.timeout, Timeout)
+                else str(options.timeout),
+                **self.default_headers,
+            },
+            custom_headers,
+        )
+        self._validate_headers(headers_dict, custom_headers)
+
+        # headers are case-insensitive while dictionaries are not.
+        headers = httpx.Headers(headers_dict)
+
+        idempotency_header = self._idempotency_header
+        if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
+            headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
+
+        # Don't set these headers if they were already set or removed by the caller. We check
+        # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
+        lower_custom_headers = [header.lower() for header in custom_headers]
+        if "x-stainless-retry-count" not in lower_custom_headers:
+            headers["x-stainless-retry-count"] = str(retries_taken)
+        if "x-stainless-read-timeout" not in lower_custom_headers:
+            timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
+            if isinstance(timeout, Timeout):
+                timeout = timeout.read
+            if timeout is not None:
+                headers["x-stainless-read-timeout"] = str(timeout)
+
+        return headers
+
+    def _prepare_url(self, url: str) -> URL:
+        """
+        Merge a URL argument together with any 'base_url' on the client,
+        to create the URL used for the outgoing request.
+        """
+        # Copied from httpx's `_merge_url` method.
+        merge_url = URL(url)
+        if merge_url.is_relative_url:
+            merge_raw_path = self.base_url.raw_path + merge_url.raw_path.lstrip(b"/")
+            return self.base_url.copy_with(raw_path=merge_raw_path)
+
+        return merge_url
+
+    def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
+        return SSEDecoder()
+
+    def _build_request(
+        self,
+        options: FinalRequestOptions,
+        *,
+        retries_taken: int = 0,
+    ) -> httpx.Request:
+        if log.isEnabledFor(logging.DEBUG):
+            log.debug("Request options: %s", model_dump(options, exclude_unset=True))
+
+        kwargs: dict[str, Any] = {}
+
+        json_data = options.json_data
+        if options.extra_json is not None:
+            if json_data is None:
+                json_data = cast(Body, options.extra_json)
+            elif is_mapping(json_data):
+                json_data = _merge_mappings(json_data, options.extra_json)
+            else:
+                raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
+
+        headers = self._build_headers(options, retries_taken=retries_taken)
+        params = _merge_mappings(self.default_query, options.params)
+        content_type = headers.get("Content-Type")
+        files = options.files
+
+        # If the given Content-Type header is multipart/form-data then it
+        # has to be removed so that httpx can generate the header with
+        # additional information for us as it has to be in this form
+        # for the server to be able to correctly parse the request:
+        # multipart/form-data; boundary=---abc--
+        if content_type is not None and content_type.startswith("multipart/form-data"):
+            if "boundary" not in content_type:
+                # only remove the header if the boundary hasn't been explicitly set
+                # as the caller doesn't want httpx to come up with their own boundary
+                headers.pop("Content-Type")
+
+            # As we are now sending multipart/form-data instead of application/json
+            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/clients/#multipart-file-encoding
+            if json_data:
+                if not is_dict(json_data):
+                    raise TypeError(
+                        f"Expected query input to be a dictionary for multipart requests but got {type(json_data)} instead."
+                    )
+                kwargs["data"] = self._serialize_multipartform(json_data)
+
+            # httpx determines whether or not to send a "multipart/form-data"
+            # request based on the truthiness of the "files" argument.
+            # This gets around that issue by generating a dict value that
+            # evaluates to true.
+            #
+            # https://github.com/encode/httpx/discussions/2399#discussioncomment-3814186
+            if not files:
+                files = cast(HttpxRequestFiles, ForceMultipartDict())
+
+        prepared_url = self._prepare_url(options.url)
+        if "_" in prepared_url.host:
+            # work around https://github.com/encode/httpx/discussions/2880
+            kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")}
+
+        # TODO: report this error to httpx
+        return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
+            headers=headers,
+            timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
+            method=options.method,
+            url=prepared_url,
+            # the `Query` type that we use is incompatible with qs'
+            # `Params` type as it needs to be typed as `Mapping[str, object]`
+            # so that passing a `TypedDict` doesn't cause an error.
+            # https://github.com/microsoft/pyright/issues/3526#event-6715453066
+            params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
+            json=json_data if is_given(json_data) else None,
+            files=files,
+            **kwargs,
+        )
+
+    def _serialize_multipartform(self, data: Mapping[object, object]) -> dict[str, object]:
+        items = self.qs.stringify_items(
+            # TODO: type ignore is required as stringify_items is well typed but we can't be
+            # well typed without heavy validation.
+            data,  # type: ignore
+            array_format="brackets",
+        )
+        serialized: dict[str, object] = {}
+        for key, value in items:
+            existing = serialized.get(key)
+
+            if not existing:
+                serialized[key] = value
+                continue
+
+            # If a value has already been set for this key then that
+            # means we're sending data like `array[]=[1, 2, 3]` and we
+            # need to tell httpx that we want to send multiple values with
+            # the same key which is done by using a list or a tuple.
+            #
+            # Note: 2d arrays should never result in the same key at both
+            # levels so it's safe to assume that if the value is a list,
+            # it was because we changed it to be a list.
+            if is_list(existing):
+                existing.append(value)
+            else:
+                serialized[key] = [existing, value]
+
+        return serialized
+
+    def _maybe_override_cast_to(self, cast_to: type[ResponseT], options: FinalRequestOptions) -> type[ResponseT]:
+        if not is_given(options.headers):
+            return cast_to
+
+        # make a copy of the headers so we don't mutate user-input
+        headers = dict(options.headers)
+
+        # we internally support defining a temporary header to override the
+        # default `cast_to` type for use with `.with_raw_response` and `.with_streaming_response`
+        # see _response.py for implementation details
+        override_cast_to = headers.pop(OVERRIDE_CAST_TO_HEADER, NOT_GIVEN)
+        if is_given(override_cast_to):
+            options.headers = headers
+            return cast(Type[ResponseT], override_cast_to)
+
+        return cast_to
+
+    def _should_stream_response_body(self, request: httpx.Request) -> bool:
+        return request.headers.get(RAW_RESPONSE_HEADER) == "stream"  # type: ignore[no-any-return]
+
+    def _process_response_data(
+        self,
+        *,
+        data: object,
+        cast_to: type[ResponseT],
+        response: httpx.Response,
+    ) -> ResponseT:
+        if data is None:
+            return cast(ResponseT, None)
+
+        if cast_to is object:
+            return cast(ResponseT, data)
+
+        try:
+            if inspect.isclass(cast_to) and issubclass(cast_to, ModelBuilderProtocol):
+                return cast(ResponseT, cast_to.build(response=response, data=data))
+
+            if self._strict_response_validation:
+                return cast(ResponseT, validate_type(type_=cast_to, value=data))
+
+            return cast(ResponseT, construct_type(type_=cast_to, value=data))
+        except pydantic.ValidationError as err:
+            raise APIResponseValidationError(response=response, body=data) from err
+
+    @property
+    def qs(self) -> Querystring:
+        return Querystring()
+
+    @property
+    def custom_auth(self) -> httpx.Auth | None:
+        return None
+
+    @property
+    def auth_headers(self) -> dict[str, str]:
+        return {}
+
+    @property
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            "Accept": "application/json",
+            "Content-Type": "application/json",
+            "User-Agent": self.user_agent,
+            **self.platform_headers(),
+            **self.auth_headers,
+            **self._custom_headers,
+        }
+
+    @property
+    def default_query(self) -> dict[str, object]:
+        return {
+            **self._custom_query,
+        }
+
+    def _validate_headers(
+        self,
+        headers: Headers,  # noqa: ARG002
+        custom_headers: Headers,  # noqa: ARG002
+    ) -> None:
+        """Validate the given default headers and custom headers.
+
+        Does nothing by default.
+        """
+        return
+
+    @property
+    def user_agent(self) -> str:
+        return f"{self.__class__.__name__}/Python {self._version}"
+
+    @property
+    def base_url(self) -> URL:
+        return self._base_url
+
+    @base_url.setter
+    def base_url(self, url: URL | str) -> None:
+        self._base_url = self._enforce_trailing_slash(url if isinstance(url, URL) else URL(url))
+
+    def platform_headers(self) -> Dict[str, str]:
+        # the actual implementation is in a separate `lru_cache` decorated
+        # function because adding `lru_cache` to methods will leak memory
+        # https://github.com/python/cpython/issues/88476
+        return platform_headers(self._version, platform=self._platform)
+
+    def _calculate_nonstreaming_timeout(self, max_tokens: int) -> Timeout:
+        maximum_time = 60 * 60
+        default_time = 60 * 10
+
+        expected_time = maximum_time * max_tokens / 128_000
+        if expected_time > default_time:
+            raise ValueError(
+                "Streaming is strongly recommended for operations that may take longer than 10 minutes. "
+                + "See https://github.com/anthropics/anthropic-sdk-python#long-requests for more details",
+            )
+        return Timeout(
+            default_time,
+            connect=5.0,
+        )
+
+    def _parse_retry_after_header(self, response_headers: Optional[httpx.Headers] = None) -> float | None:
+        """Returns a float of the number of seconds (not milliseconds) to wait after retrying, or None if unspecified.
+
+        About the Retry-After header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After
+        See also  https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After#syntax
+        """
+        if response_headers is None:
+            return None
+
+        # First, try the non-standard `retry-after-ms` header for milliseconds,
+        # which is more precise than integer-seconds `retry-after`
+        try:
+            retry_ms_header = response_headers.get("retry-after-ms", None)
+            return float(retry_ms_header) / 1000
+        except (TypeError, ValueError):
+            pass
+
+        # Next, try parsing `retry-after` header as seconds (allowing nonstandard floats).
+        retry_header = response_headers.get("retry-after")
+        try:
+            # note: the spec indicates that this should only ever be an integer
+            # but if someone sends a float there's no reason for us to not respect it
+            return float(retry_header)
+        except (TypeError, ValueError):
+            pass
+
+        # Last, try parsing `retry-after` as a date.
+        retry_date_tuple = email.utils.parsedate_tz(retry_header)
+        if retry_date_tuple is None:
+            return None
+
+        retry_date = email.utils.mktime_tz(retry_date_tuple)
+        return float(retry_date - time.time())
+
+    def _calculate_retry_timeout(
+        self,
+        remaining_retries: int,
+        options: FinalRequestOptions,
+        response_headers: Optional[httpx.Headers] = None,
+    ) -> float:
+        max_retries = options.get_max_retries(self.max_retries)
+
+        # If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
+        retry_after = self._parse_retry_after_header(response_headers)
+        if retry_after is not None and 0 < retry_after <= 60:
+            return retry_after
+
+        # Also cap retry count to 1000 to avoid any potential overflows with `pow`
+        nb_retries = min(max_retries - remaining_retries, 1000)
+
+        # Apply exponential backoff, but not more than the max.
+        sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
+
+        # Apply some jitter, plus-or-minus half a second.
+        jitter = 1 - 0.25 * random()
+        timeout = sleep_seconds * jitter
+        return timeout if timeout >= 0 else 0
+
+    def _should_retry(self, response: httpx.Response) -> bool:
+        # Note: this is not a standard header
+        should_retry_header = response.headers.get("x-should-retry")
+
+        # If the server explicitly says whether or not to retry, obey.
+        if should_retry_header == "true":
+            log.debug("Retrying as header `x-should-retry` is set to `true`")
+            return True
+        if should_retry_header == "false":
+            log.debug("Not retrying as header `x-should-retry` is set to `false`")
+            return False
+
+        # Retry on request timeouts.
+        if response.status_code == 408:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry on lock timeouts.
+        if response.status_code == 409:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry on rate limits.
+        if response.status_code == 429:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry internal errors.
+        if response.status_code >= 500:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        log.debug("Not retrying")
+        return False
+
+    def _idempotency_key(self) -> str:
+        return f"stainless-python-retry-{uuid.uuid4()}"
+
+
+class _DefaultHttpxClient(httpx.Client):
+    def __init__(self, **kwargs: Any) -> None:
+        kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+        kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+        kwargs.setdefault("follow_redirects", True)
+
+        if "transport" not in kwargs:
+            socket_options = [
+                (socket.SOL_SOCKET, socket.SO_KEEPALIVE, True),
+                (socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 60),
+                (socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 5),
+            ]
+            TCP_KEEPIDLE = getattr(socket, "TCP_KEEPIDLE", None)
+            if TCP_KEEPIDLE is not None:
+                socket_options.append((socket.IPPROTO_TCP, TCP_KEEPIDLE, 60))
+
+            kwargs["transport"] = httpx.HTTPTransport(
+                # note: limits is always set above
+                limits=kwargs["limits"],
+                socket_options=socket_options,
+            )
+
+        super().__init__(**kwargs)
+
+
+if TYPE_CHECKING:
+    DefaultHttpxClient = httpx.Client
+    """An alias to `httpx.Client` that provides the same defaults that this SDK
+    uses internally.
+
+    This is useful because overriding the `http_client` with your own instance of
+    `httpx.Client` will result in httpx's defaults being used, not ours.
+    """
+else:
+    DefaultHttpxClient = _DefaultHttpxClient
+
+
+class SyncHttpxClientWrapper(DefaultHttpxClient):
+    def __del__(self) -> None:
+        if self.is_closed:
+            return
+
+        try:
+            self.close()
+        except Exception:
+            pass
+
+
+class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
+    _client: httpx.Client
+    _default_stream_cls: type[Stream[Any]] | None = None
+
+    def __init__(
+        self,
+        *,
+        version: str,
+        base_url: str | URL,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        transport: Transport | None = None,
+        proxies: ProxiesTypes | None = None,
+        limits: Limits | None = None,
+        http_client: httpx.Client | None = None,
+        custom_headers: Mapping[str, str] | None = None,
+        custom_query: Mapping[str, object] | None = None,
+        _strict_response_validation: bool,
+    ) -> None:
+        kwargs: dict[str, Any] = {}
+        if limits is not None:
+            warnings.warn(
+                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
+        else:
+            limits = DEFAULT_CONNECTION_LIMITS
+
+        if transport is not None:
+            kwargs["transport"] = transport
+            warnings.warn(
+                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
+
+        if proxies is not None:
+            kwargs["proxies"] = proxies
+            warnings.warn(
+                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
+
+        if not is_given(timeout):
+            # if the user passed in a custom http client with a non-default
+            # timeout set then we use that timeout.
+            #
+            # note: there is an edge case here where the user passes in a client
+            # where they've explicitly set the timeout to match the default timeout
+            # as this check is structural, meaning that we'll think they didn't
+            # pass in a timeout and will ignore it
+            if http_client and http_client.timeout != HTTPX_DEFAULT_TIMEOUT:
+                timeout = http_client.timeout
+            else:
+                timeout = DEFAULT_TIMEOUT
+
+        if http_client is not None and not isinstance(http_client, httpx.Client):  # pyright: ignore[reportUnnecessaryIsInstance]
+            raise TypeError(
+                f"Invalid `http_client` argument; Expected an instance of `httpx.Client` but got {type(http_client)}"
+            )
+
+        super().__init__(
+            version=version,
+            limits=limits,
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=cast(Timeout, timeout),
+            proxies=proxies,
+            base_url=base_url,
+            transport=transport,
+            max_retries=max_retries,
+            custom_query=custom_query,
+            custom_headers=custom_headers,
+            _strict_response_validation=_strict_response_validation,
+        )
+        self._client = http_client or SyncHttpxClientWrapper(
+            base_url=base_url,
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=cast(Timeout, timeout),
+            limits=limits,
+            follow_redirects=True,
+            **kwargs,  # type: ignore
+        )
+
+    def is_closed(self) -> bool:
+        return self._client.is_closed
+
+    def close(self) -> None:
+        """Close the underlying HTTPX client.
+
+        The client will *not* be usable after this.
+        """
+        # If an error is thrown while constructing a client, self._client
+        # may not be present
+        if hasattr(self, "_client"):
+            self._client.close()
+
+    def __enter__(self: _T) -> _T:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def _prepare_options(
+        self,
+        options: FinalRequestOptions,  # noqa: ARG002
+    ) -> FinalRequestOptions:
+        """Hook for mutating the given options"""
+        return options
+
+    def _prepare_request(
+        self,
+        request: httpx.Request,  # noqa: ARG002
+    ) -> None:
+        """This method is used as a callback for mutating the `Request` object
+        after it has been constructed.
+        This is useful for cases where you want to add certain headers based off of
+        the request properties, e.g. `url`, `method` etc.
+        """
+        return None
+
+    @overload
+    def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: Literal[True],
+        stream_cls: Type[_StreamT],
+    ) -> _StreamT: ...
+
+    @overload
+    def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: Literal[False] = False,
+    ) -> ResponseT: ...
+
+    @overload
+    def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: bool = False,
+        stream_cls: Type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT: ...
+
+    def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: bool = False,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        if remaining_retries is not None:
+            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
+        else:
+            retries_taken = 0
+
+        return self._request(
+            cast_to=cast_to,
+            options=options,
+            stream=stream,
+            stream_cls=stream_cls,
+            retries_taken=retries_taken,
+        )
+
+    def _request(
+        self,
+        *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        retries_taken: int,
+        stream: bool,
+        stream_cls: type[_StreamT] | None,
+    ) -> ResponseT | _StreamT:
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+
+        cast_to = self._maybe_override_cast_to(cast_to, options)
+        options = self._prepare_options(options)
+
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        request = self._build_request(options, retries_taken=retries_taken)
+        self._prepare_request(request)
+
+        kwargs: HttpxSendArgs = {}
+        if self.custom_auth is not None:
+            kwargs["auth"] = self.custom_auth
+
+        log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+
+        try:
+            response = self._client.send(
+                request,
+                stream=stream or self._should_stream_response_body(request=request),
+                **kwargs,
+            )
+        except httpx.TimeoutException as err:
+            log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+            if remaining_retries > 0:
+                return self._retry_request(
+                    input_options,
+                    cast_to,
+                    retries_taken=retries_taken,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising timeout error")
+            raise APITimeoutError(request=request) from err
+        except Exception as err:
+            log.debug("Encountered Exception", exc_info=True)
+
+            if remaining_retries > 0:
+                return self._retry_request(
+                    input_options,
+                    cast_to,
+                    retries_taken=retries_taken,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising connection error")
+            raise APIConnectionError(request=request) from err
+
+        log.debug(
+            'HTTP Response: %s %s "%i %s" %s',
+            request.method,
+            request.url,
+            response.status_code,
+            response.reason_phrase,
+            response.headers,
+        )
+        log.debug("request_id: %s", response.headers.get("request-id"))
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+            if remaining_retries > 0 and self._should_retry(err.response):
+                err.response.close()
+                return self._retry_request(
+                    input_options,
+                    cast_to,
+                    retries_taken=retries_taken,
+                    response_headers=err.response.headers,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                )
+
+            # If the response is streamed then we need to explicitly read the response
+            # to completion before attempting to access the response text.
+            if not err.response.is_closed:
+                err.response.read()
+
+            log.debug("Re-raising status error")
+            raise self._make_status_error_from_response(err.response) from None
+
+        return self._process_response(
+            cast_to=cast_to,
+            options=options,
+            response=response,
+            stream=stream,
+            stream_cls=stream_cls,
+            retries_taken=retries_taken,
+        )
+
+    def _retry_request(
+        self,
+        options: FinalRequestOptions,
+        cast_to: Type[ResponseT],
+        *,
+        retries_taken: int,
+        response_headers: httpx.Headers | None,
+        stream: bool,
+        stream_cls: type[_StreamT] | None,
+    ) -> ResponseT | _StreamT:
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        if remaining_retries == 1:
+            log.debug("1 retry left")
+        else:
+            log.debug("%i retries left", remaining_retries)
+
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+
+        # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
+        # different thread if necessary.
+        time.sleep(timeout)
+
+        return self._request(
+            options=options,
+            cast_to=cast_to,
+            retries_taken=retries_taken + 1,
+            stream=stream,
+            stream_cls=stream_cls,
+        )
+
+    def _process_response(
+        self,
+        *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        retries_taken: int = 0,
+    ) -> ResponseT:
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_to=cast_to,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                    retries_taken=retries_taken,
+                ),
+            )
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+            if not issubclass(origin, APIResponse):
+                raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
+            return cast(
+                ResponseT,
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_to=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                    retries_taken=retries_taken,
+                ),
+            )
+
+        if cast_to == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = APIResponse(
+            raw=response,
+            client=self,
+            cast_to=cast("type[ResponseT]", cast_to),  # pyright: ignore[reportUnnecessaryCast]
+            stream=stream,
+            stream_cls=stream_cls,
+            options=options,
+            retries_taken=retries_taken,
+        )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return api_response.parse()
+
+    def _request_api_list(
+        self,
+        model: Type[object],
+        page: Type[SyncPageT],
+        options: FinalRequestOptions,
+    ) -> SyncPageT:
+        def _parser(resp: SyncPageT) -> SyncPageT:
+            resp._set_private_attributes(
+                client=self,
+                model=model,
+                options=options,
+            )
+            return resp
+
+        options.post_parser = _parser
+
+        return self.request(page, options, stream=False)
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: Literal[False] = False,
+    ) -> ResponseT: ...
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: Literal[True],
+        stream_cls: type[_StreamT],
+    ) -> _StreamT: ...
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: bool,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT: ...
+
+    def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: bool = False,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        opts = FinalRequestOptions.construct(method="get", url=path, **options)
+        # cast is required because mypy complains about returning Any even though
+        # it understands the type variables
+        return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        files: RequestFiles | None = None,
+        stream: Literal[False] = False,
+    ) -> ResponseT: ...
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        files: RequestFiles | None = None,
+        stream: Literal[True],
+        stream_cls: type[_StreamT],
+    ) -> _StreamT: ...
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        files: RequestFiles | None = None,
+        stream: bool,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT: ...
+
+    def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        files: RequestFiles | None = None,
+        stream: bool = False,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        opts = FinalRequestOptions.construct(
+            method="post", url=path, json_data=body, files=to_httpx_files(files), **options
+        )
+        return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
+
+    def patch(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
+        return self.request(cast_to, opts)
+
+    def put(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(
+            method="put", url=path, json_data=body, files=to_httpx_files(files), **options
+        )
+        return self.request(cast_to, opts)
+
+    def delete(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options)
+        return self.request(cast_to, opts)
+
+    def get_api_list(
+        self,
+        path: str,
+        *,
+        model: Type[object],
+        page: Type[SyncPageT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        method: str = "get",
+    ) -> SyncPageT:
+        opts = FinalRequestOptions.construct(method=method, url=path, json_data=body, **options)
+        return self._request_api_list(model, page, opts)
+
+
+class _DefaultAsyncHttpxClient(httpx.AsyncClient):
+    def __init__(self, **kwargs: Any) -> None:
+        kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+        kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+        kwargs.setdefault("follow_redirects", True)
+
+        if "transport" not in kwargs:
+            socket_options = [
+                (socket.SOL_SOCKET, socket.SO_KEEPALIVE, True),
+                (socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 60),
+                (socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 5),
+            ]
+            TCP_KEEPIDLE = getattr(socket, "TCP_KEEPIDLE", None)
+            if TCP_KEEPIDLE is not None:
+                socket_options.append((socket.IPPROTO_TCP, TCP_KEEPIDLE, 60))
+
+            kwargs["transport"] = httpx.AsyncHTTPTransport(
+                # note: limits is always set above
+                limits=kwargs["limits"],
+                socket_options=socket_options,
+            )
+
+        super().__init__(**kwargs)
+
+
+if TYPE_CHECKING:
+    DefaultAsyncHttpxClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that provides the same defaults that this SDK
+    uses internally.
+
+    This is useful because overriding the `http_client` with your own instance of
+    `httpx.AsyncClient` will result in httpx's defaults being used, not ours.
+    """
+else:
+    DefaultAsyncHttpxClient = _DefaultAsyncHttpxClient
+
+
+class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):
+    def __del__(self) -> None:
+        if self.is_closed:
+            return
+
+        try:
+            # TODO(someday): support non asyncio runtimes here
+            asyncio.get_running_loop().create_task(self.aclose())
+        except Exception:
+            pass
+
+
+class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
+    _client: httpx.AsyncClient
+    _default_stream_cls: type[AsyncStream[Any]] | None = None
+
+    def __init__(
+        self,
+        *,
+        version: str,
+        base_url: str | URL,
+        _strict_response_validation: bool,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        transport: AsyncTransport | None = None,
+        proxies: ProxiesTypes | None = None,
+        limits: Limits | None = None,
+        http_client: httpx.AsyncClient | None = None,
+        custom_headers: Mapping[str, str] | None = None,
+        custom_query: Mapping[str, object] | None = None,
+    ) -> None:
+        kwargs: dict[str, Any] = {}
+        if limits is not None:
+            warnings.warn(
+                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
+        else:
+            limits = DEFAULT_CONNECTION_LIMITS
+
+        if transport is not None:
+            kwargs["transport"] = transport
+
+            warnings.warn(
+                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
+
+        if proxies is not None:
+            kwargs["proxies"] = proxies
+            warnings.warn(
+                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
+
+        if not is_given(timeout):
+            # if the user passed in a custom http client with a non-default
+            # timeout set then we use that timeout.
+            #
+            # note: there is an edge case here where the user passes in a client
+            # where they've explicitly set the timeout to match the default timeout
+            # as this check is structural, meaning that we'll think they didn't
+            # pass in a timeout and will ignore it
+            if http_client and http_client.timeout != HTTPX_DEFAULT_TIMEOUT:
+                timeout = http_client.timeout
+            else:
+                timeout = DEFAULT_TIMEOUT
+
+        if http_client is not None and not isinstance(http_client, httpx.AsyncClient):  # pyright: ignore[reportUnnecessaryIsInstance]
+            raise TypeError(
+                f"Invalid `http_client` argument; Expected an instance of `httpx.AsyncClient` but got {type(http_client)}"
+            )
+
+        super().__init__(
+            version=version,
+            base_url=base_url,
+            limits=limits,
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=cast(Timeout, timeout),
+            proxies=proxies,
+            transport=transport,
+            max_retries=max_retries,
+            custom_query=custom_query,
+            custom_headers=custom_headers,
+            _strict_response_validation=_strict_response_validation,
+        )
+        self._client = http_client or AsyncHttpxClientWrapper(
+            base_url=base_url,
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=cast(Timeout, timeout),
+            limits=limits,
+            follow_redirects=True,
+            **kwargs,  # type: ignore
+        )
+
+    def is_closed(self) -> bool:
+        return self._client.is_closed
+
+    async def close(self) -> None:
+        """Close the underlying HTTPX client.
+
+        The client will *not* be usable after this.
+        """
+        await self._client.aclose()
+
+    async def __aenter__(self: _T) -> _T:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def _prepare_options(
+        self,
+        options: FinalRequestOptions,  # noqa: ARG002
+    ) -> FinalRequestOptions:
+        """Hook for mutating the given options"""
+        return options
+
+    async def _prepare_request(
+        self,
+        request: httpx.Request,  # noqa: ARG002
+    ) -> None:
+        """This method is used as a callback for mutating the `Request` object
+        after it has been constructed.
+        This is useful for cases where you want to add certain headers based off of
+        the request properties, e.g. `url`, `method` etc.
+        """
+        return None
+
+    @overload
+    async def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: Literal[False] = False,
+        remaining_retries: Optional[int] = None,
+    ) -> ResponseT: ...
+
+    @overload
+    async def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: Literal[True],
+        stream_cls: type[_AsyncStreamT],
+        remaining_retries: Optional[int] = None,
+    ) -> _AsyncStreamT: ...
+
+    @overload
+    async def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None = None,
+        remaining_retries: Optional[int] = None,
+    ) -> ResponseT | _AsyncStreamT: ...
+
+    async def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: bool = False,
+        stream_cls: type[_AsyncStreamT] | None = None,
+        remaining_retries: Optional[int] = None,
+    ) -> ResponseT | _AsyncStreamT:
+        if remaining_retries is not None:
+            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
+        else:
+            retries_taken = 0
+
+        return await self._request(
+            cast_to=cast_to,
+            options=options,
+            stream=stream,
+            stream_cls=stream_cls,
+            retries_taken=retries_taken,
+        )
+
+    async def _request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None,
+        retries_taken: int,
+    ) -> ResponseT | _AsyncStreamT:
+        if self._platform is None:
+            # `get_platform` can make blocking IO calls so we
+            # execute it earlier while we are in an async context
+            self._platform = await asyncify(get_platform)()
+
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+
+        cast_to = self._maybe_override_cast_to(cast_to, options)
+        options = await self._prepare_options(options)
+
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        request = self._build_request(options, retries_taken=retries_taken)
+        await self._prepare_request(request)
+
+        kwargs: HttpxSendArgs = {}
+        if self.custom_auth is not None:
+            kwargs["auth"] = self.custom_auth
+
+        try:
+            response = await self._client.send(
+                request,
+                stream=stream or self._should_stream_response_body(request=request),
+                **kwargs,
+            )
+        except httpx.TimeoutException as err:
+            log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+            if remaining_retries > 0:
+                return await self._retry_request(
+                    input_options,
+                    cast_to,
+                    retries_taken=retries_taken,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising timeout error")
+            raise APITimeoutError(request=request) from err
+        except Exception as err:
+            log.debug("Encountered Exception", exc_info=True)
+
+            if remaining_retries > 0:
+                return await self._retry_request(
+                    input_options,
+                    cast_to,
+                    retries_taken=retries_taken,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising connection error")
+            raise APIConnectionError(request=request) from err
+
+        log.debug(
+            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+        )
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+            if remaining_retries > 0 and self._should_retry(err.response):
+                await err.response.aclose()
+                return await self._retry_request(
+                    input_options,
+                    cast_to,
+                    retries_taken=retries_taken,
+                    response_headers=err.response.headers,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                )
+
+            # If the response is streamed then we need to explicitly read the response
+            # to completion before attempting to access the response text.
+            if not err.response.is_closed:
+                await err.response.aread()
+
+            log.debug("Re-raising status error")
+            raise self._make_status_error_from_response(err.response) from None
+
+        return await self._process_response(
+            cast_to=cast_to,
+            options=options,
+            response=response,
+            stream=stream,
+            stream_cls=stream_cls,
+            retries_taken=retries_taken,
+        )
+
+    async def _retry_request(
+        self,
+        options: FinalRequestOptions,
+        cast_to: Type[ResponseT],
+        *,
+        retries_taken: int,
+        response_headers: httpx.Headers | None,
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None,
+    ) -> ResponseT | _AsyncStreamT:
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        if remaining_retries == 1:
+            log.debug("1 retry left")
+        else:
+            log.debug("%i retries left", remaining_retries)
+
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+
+        await anyio.sleep(timeout)
+
+        return await self._request(
+            options=options,
+            cast_to=cast_to,
+            retries_taken=retries_taken + 1,
+            stream=stream,
+            stream_cls=stream_cls,
+        )
+
+    async def _process_response(
+        self,
+        *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        retries_taken: int = 0,
+    ) -> ResponseT:
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_to=cast_to,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                    retries_taken=retries_taken,
+                ),
+            )
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+            if not issubclass(origin, AsyncAPIResponse):
+                raise TypeError(f"API Response types must subclass {AsyncAPIResponse}; Received {origin}")
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
+            return cast(
+                "ResponseT",
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_to=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                    retries_taken=retries_taken,
+                ),
+            )
+
+        if cast_to == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = AsyncAPIResponse(
+            raw=response,
+            client=self,
+            cast_to=cast("type[ResponseT]", cast_to),  # pyright: ignore[reportUnnecessaryCast]
+            stream=stream,
+            stream_cls=stream_cls,
+            options=options,
+            retries_taken=retries_taken,
+        )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return await api_response.parse()
+
+    def _request_api_list(
+        self,
+        model: Type[_T],
+        page: Type[AsyncPageT],
+        options: FinalRequestOptions,
+    ) -> AsyncPaginator[_T, AsyncPageT]:
+        return AsyncPaginator(client=self, options=options, page_cls=page, model=model)
+
+    @overload
+    async def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: Literal[False] = False,
+    ) -> ResponseT: ...
+
+    @overload
+    async def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: Literal[True],
+        stream_cls: type[_AsyncStreamT],
+    ) -> _AsyncStreamT: ...
+
+    @overload
+    async def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None = None,
+    ) -> ResponseT | _AsyncStreamT: ...
+
+    async def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: bool = False,
+        stream_cls: type[_AsyncStreamT] | None = None,
+    ) -> ResponseT | _AsyncStreamT:
+        opts = FinalRequestOptions.construct(method="get", url=path, **options)
+        return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
+
+    @overload
+    async def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+        stream: Literal[False] = False,
+    ) -> ResponseT: ...
+
+    @overload
+    async def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+        stream: Literal[True],
+        stream_cls: type[_AsyncStreamT],
+    ) -> _AsyncStreamT: ...
+
+    @overload
+    async def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None = None,
+    ) -> ResponseT | _AsyncStreamT: ...
+
+    async def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+        stream: bool = False,
+        stream_cls: type[_AsyncStreamT] | None = None,
+    ) -> ResponseT | _AsyncStreamT:
+        opts = FinalRequestOptions.construct(
+            method="post", url=path, json_data=body, files=await async_to_httpx_files(files), **options
+        )
+        return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
+
+    async def patch(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
+        return await self.request(cast_to, opts)
+
+    async def put(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(
+            method="put", url=path, json_data=body, files=await async_to_httpx_files(files), **options
+        )
+        return await self.request(cast_to, opts)
+
+    async def delete(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options)
+        return await self.request(cast_to, opts)
+
+    def get_api_list(
+        self,
+        path: str,
+        *,
+        model: Type[_T],
+        page: Type[AsyncPageT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        method: str = "get",
+    ) -> AsyncPaginator[_T, AsyncPageT]:
+        opts = FinalRequestOptions.construct(method=method, url=path, json_data=body, **options)
+        return self._request_api_list(model, page, opts)
+
+
+def make_request_options(
+    *,
+    query: Query | None = None,
+    extra_headers: Headers | None = None,
+    extra_query: Query | None = None,
+    extra_body: Body | None = None,
+    idempotency_key: str | None = None,
+    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    post_parser: PostParser | NotGiven = NOT_GIVEN,
+) -> RequestOptions:
+    """Create a dict of type RequestOptions without keys of NotGiven values."""
+    options: RequestOptions = {}
+    if extra_headers is not None:
+        options["headers"] = extra_headers
+
+    if extra_body is not None:
+        options["extra_json"] = cast(AnyMapping, extra_body)
+
+    if query is not None:
+        options["params"] = query
+
+    if extra_query is not None:
+        options["params"] = {**options.get("params", {}), **extra_query}
+
+    if not isinstance(timeout, NotGiven):
+        options["timeout"] = timeout
+
+    if idempotency_key is not None:
+        options["idempotency_key"] = idempotency_key
+
+    if is_given(post_parser):
+        # internal
+        options["post_parser"] = post_parser  # type: ignore
+
+    return options
+
+
+class ForceMultipartDict(Dict[str, None]):
+    def __bool__(self) -> bool:
+        return True
+
+
+class OtherPlatform:
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+    @override
+    def __str__(self) -> str:
+        return f"Other:{self.name}"
+
+
+Platform = Union[
+    OtherPlatform,
+    Literal[
+        "MacOS",
+        "Linux",
+        "Windows",
+        "FreeBSD",
+        "OpenBSD",
+        "iOS",
+        "Android",
+        "Unknown",
+    ],
+]
+
+
+def get_platform() -> Platform:
+    try:
+        system = platform.system().lower()
+        platform_name = platform.platform().lower()
+    except Exception:
+        return "Unknown"
+
+    if "iphone" in platform_name or "ipad" in platform_name:
+        # Tested using Python3IDE on an iPhone 11 and Pythonista on an iPad 7
+        # system is Darwin and platform_name is a string like:
+        # - Darwin-21.6.0-iPhone12,1-64bit
+        # - Darwin-21.6.0-iPad7,11-64bit
+        return "iOS"
+
+    if system == "darwin":
+        return "MacOS"
+
+    if system == "windows":
+        return "Windows"
+
+    if "android" in platform_name:
+        # Tested using Pydroid 3
+        # system is Linux and platform_name is a string like 'Linux-5.10.81-android12-9-00001-geba40aecb3b7-ab8534902-aarch64-with-libc'
+        return "Android"
+
+    if system == "linux":
+        # https://distro.readthedocs.io/en/latest/#distro.id
+        distro_id = distro.id()
+        if distro_id == "freebsd":
+            return "FreeBSD"
+
+        if distro_id == "openbsd":
+            return "OpenBSD"
+
+        return "Linux"
+
+    if platform_name:
+        return OtherPlatform(platform_name)
+
+    return "Unknown"
+
+
+@lru_cache(maxsize=None)
+def platform_headers(version: str, *, platform: Platform | None) -> Dict[str, str]:
+    return {
+        "X-Stainless-Lang": "python",
+        "X-Stainless-Package-Version": version,
+        "X-Stainless-OS": str(platform or get_platform()),
+        "X-Stainless-Arch": str(get_architecture()),
+        "X-Stainless-Runtime": get_python_runtime(),
+        "X-Stainless-Runtime-Version": get_python_version(),
+    }
+
+
+class OtherArch:
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+    @override
+    def __str__(self) -> str:
+        return f"other:{self.name}"
+
+
+Arch = Union[OtherArch, Literal["x32", "x64", "arm", "arm64", "unknown"]]
+
+
+def get_python_runtime() -> str:
+    try:
+        return platform.python_implementation()
+    except Exception:
+        return "unknown"
+
+
+def get_python_version() -> str:
+    try:
+        return platform.python_version()
+    except Exception:
+        return "unknown"
+
+
+def get_architecture() -> Arch:
+    try:
+        machine = platform.machine().lower()
+    except Exception:
+        return "unknown"
+
+    if machine in ("arm64", "aarch64"):
+        return "arm64"
+
+    # TODO: untested
+    if machine == "arm":
+        return "arm"
+
+    if machine == "x86_64":
+        return "x64"
+
+    # TODO: untested
+    if sys.maxsize <= 2**32:
+        return "x32"
+
+    if machine:
+        return OtherArch(machine)
+
+    return "unknown"
+
+
+def _merge_mappings(
+    obj1: Mapping[_T_co, Union[_T, Omit]],
+    obj2: Mapping[_T_co, Union[_T, Omit]],
+) -> Dict[_T_co, _T]:
+    """Merge two mappings of the same type, removing any values that are instances of `Omit`.
+
+    In cases with duplicate keys the second mapping takes precedence.
+    """
+    merged = {**obj1, **obj2}
+    return {key: value for key, value in merged.items() if not isinstance(value, Omit)}
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_client.py b/.venv/lib/python3.12/site-packages/anthropic/_client.py
new file mode 100644
index 00000000..842e26b5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_client.py
@@ -0,0 +1,531 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, Union, Mapping
+from typing_extensions import Self, override
+
+import httpx
+
+from . import _constants, _exceptions
+from ._qs import Querystring
+from ._types import (
+    NOT_GIVEN,
+    Omit,
+    Headers,
+    Timeout,
+    NotGiven,
+    Transport,
+    ProxiesTypes,
+    RequestOptions,
+)
+from ._utils import (
+    is_given,
+    get_async_library,
+)
+from ._version import __version__
+from .resources import models, completions
+from ._streaming import Stream as Stream, AsyncStream as AsyncStream
+from ._exceptions import APIStatusError
+from ._base_client import (
+    DEFAULT_MAX_RETRIES,
+    SyncAPIClient,
+    AsyncAPIClient,
+)
+from .resources.beta import beta
+from .resources.messages import messages
+
+__all__ = [
+    "Timeout",
+    "Transport",
+    "ProxiesTypes",
+    "RequestOptions",
+    "Anthropic",
+    "AsyncAnthropic",
+    "Client",
+    "AsyncClient",
+]
+
+
+class Anthropic(SyncAPIClient):
+    completions: completions.Completions
+    messages: messages.Messages
+    models: models.Models
+    beta: beta.Beta
+    with_raw_response: AnthropicWithRawResponse
+    with_streaming_response: AnthropicWithStreamedResponse
+
+    # client options
+    api_key: str | None
+    auth_token: str | None
+
+    # constants
+    HUMAN_PROMPT = _constants.HUMAN_PROMPT
+    AI_PROMPT = _constants.AI_PROMPT
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        auth_token: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client.
+        # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        http_client: httpx.Client | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new synchronous Anthropic client instance.
+
+        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
+        - `api_key` from `ANTHROPIC_API_KEY`
+        - `auth_token` from `ANTHROPIC_AUTH_TOKEN`
+        """
+        if api_key is None:
+            api_key = os.environ.get("ANTHROPIC_API_KEY")
+        self.api_key = api_key
+
+        if auth_token is None:
+            auth_token = os.environ.get("ANTHROPIC_AUTH_TOKEN")
+        self.auth_token = auth_token
+
+        if base_url is None:
+            base_url = os.environ.get("ANTHROPIC_BASE_URL")
+        if base_url is None:
+            base_url = f"https://api.anthropic.com"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            max_retries=max_retries,
+            timeout=timeout,
+            http_client=http_client,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        self._default_stream_cls = Stream
+
+        self.completions = completions.Completions(self)
+        self.messages = messages.Messages(self)
+        self.models = models.Models(self)
+        self.beta = beta.Beta(self)
+        self.with_raw_response = AnthropicWithRawResponse(self)
+        self.with_streaming_response = AnthropicWithStreamedResponse(self)
+
+    @property
+    @override
+    def qs(self) -> Querystring:
+        return Querystring(array_format="comma")
+
+    @property
+    @override
+    def auth_headers(self) -> dict[str, str]:
+        if self._api_key_auth:
+            return self._api_key_auth
+        if self._bearer_auth:
+            return self._bearer_auth
+        return {}
+
+    @property
+    def _api_key_auth(self) -> dict[str, str]:
+        api_key = self.api_key
+        if api_key is None:
+            return {}
+        return {"X-Api-Key": api_key}
+
+    @property
+    def _bearer_auth(self) -> dict[str, str]:
+        auth_token = self.auth_token
+        if auth_token is None:
+            return {}
+        return {"Authorization": f"Bearer {auth_token}"}
+
+    @property
+    @override
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            **super().default_headers,
+            "X-Stainless-Async": "false",
+            "anthropic-version": "2023-06-01",
+            **self._custom_headers,
+        }
+
+    @override
+    def _validate_headers(self, headers: Headers, custom_headers: Headers) -> None:
+        if self.api_key and headers.get("X-Api-Key"):
+            return
+        if isinstance(custom_headers.get("X-Api-Key"), Omit):
+            return
+
+        if self.auth_token and headers.get("Authorization"):
+            return
+        if isinstance(custom_headers.get("Authorization"), Omit):
+            return
+
+        raise TypeError(
+            '"Could not resolve authentication method. Expected either api_key or auth_token to be set. Or for one of the `X-Api-Key` or `Authorization` headers to be explicitly omitted"'
+        )
+
+    def copy(
+        self,
+        *,
+        api_key: str | None = None,
+        auth_token: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.Client | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        http_client = http_client or self._client
+        return self.__class__(
+            api_key=api_key or self.api_key,
+            auth_token=auth_token or self.auth_token,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=body)
+
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
+
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
+
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=body)
+
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=body)
+
+        if response.status_code == 413:
+            return _exceptions.RequestTooLargeError(err_msg, response=response, body=body)
+
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
+
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=body)
+
+        if response.status_code == 529:
+            return _exceptions.OverloadedError(err_msg, response=response, body=body)
+
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
+        return APIStatusError(err_msg, response=response, body=body)
+
+
+class AsyncAnthropic(AsyncAPIClient):
+    completions: completions.AsyncCompletions
+    messages: messages.AsyncMessages
+    models: models.AsyncModels
+    beta: beta.AsyncBeta
+    with_raw_response: AsyncAnthropicWithRawResponse
+    with_streaming_response: AsyncAnthropicWithStreamedResponse
+
+    # client options
+    api_key: str | None
+    auth_token: str | None
+
+    # constants
+    HUMAN_PROMPT = _constants.HUMAN_PROMPT
+    AI_PROMPT = _constants.AI_PROMPT
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        auth_token: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client.
+        # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
+        http_client: httpx.AsyncClient | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new async AsyncAnthropic client instance.
+
+        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
+        - `api_key` from `ANTHROPIC_API_KEY`
+        - `auth_token` from `ANTHROPIC_AUTH_TOKEN`
+        """
+        if api_key is None:
+            api_key = os.environ.get("ANTHROPIC_API_KEY")
+        self.api_key = api_key
+
+        if auth_token is None:
+            auth_token = os.environ.get("ANTHROPIC_AUTH_TOKEN")
+        self.auth_token = auth_token
+
+        if base_url is None:
+            base_url = os.environ.get("ANTHROPIC_BASE_URL")
+        if base_url is None:
+            base_url = f"https://api.anthropic.com"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            max_retries=max_retries,
+            timeout=timeout,
+            http_client=http_client,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        self._default_stream_cls = AsyncStream
+
+        self.completions = completions.AsyncCompletions(self)
+        self.messages = messages.AsyncMessages(self)
+        self.models = models.AsyncModels(self)
+        self.beta = beta.AsyncBeta(self)
+        self.with_raw_response = AsyncAnthropicWithRawResponse(self)
+        self.with_streaming_response = AsyncAnthropicWithStreamedResponse(self)
+
+    @property
+    @override
+    def qs(self) -> Querystring:
+        return Querystring(array_format="comma")
+
+    @property
+    @override
+    def auth_headers(self) -> dict[str, str]:
+        if self._api_key_auth:
+            return self._api_key_auth
+        if self._bearer_auth:
+            return self._bearer_auth
+        return {}
+
+    @property
+    def _api_key_auth(self) -> dict[str, str]:
+        api_key = self.api_key
+        if api_key is None:
+            return {}
+        return {"X-Api-Key": api_key}
+
+    @property
+    def _bearer_auth(self) -> dict[str, str]:
+        auth_token = self.auth_token
+        if auth_token is None:
+            return {}
+        return {"Authorization": f"Bearer {auth_token}"}
+
+    @property
+    @override
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            **super().default_headers,
+            "X-Stainless-Async": f"async:{get_async_library()}",
+            "anthropic-version": "2023-06-01",
+            **self._custom_headers,
+        }
+
+    @override
+    def _validate_headers(self, headers: Headers, custom_headers: Headers) -> None:
+        if self.api_key and headers.get("X-Api-Key"):
+            return
+        if isinstance(custom_headers.get("X-Api-Key"), Omit):
+            return
+
+        if self.auth_token and headers.get("Authorization"):
+            return
+        if isinstance(custom_headers.get("Authorization"), Omit):
+            return
+
+        raise TypeError(
+            '"Could not resolve authentication method. Expected either api_key or auth_token to be set. Or for one of the `X-Api-Key` or `Authorization` headers to be explicitly omitted"'
+        )
+
+    def copy(
+        self,
+        *,
+        api_key: str | None = None,
+        auth_token: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.AsyncClient | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        http_client = http_client or self._client
+        return self.__class__(
+            api_key=api_key or self.api_key,
+            auth_token=auth_token or self.auth_token,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=body)
+
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
+
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
+
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=body)
+
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=body)
+
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
+
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=body)
+
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
+        return APIStatusError(err_msg, response=response, body=body)
+
+
+class AnthropicWithRawResponse:
+    def __init__(self, client: Anthropic) -> None:
+        self.completions = completions.CompletionsWithRawResponse(client.completions)
+        self.messages = messages.MessagesWithRawResponse(client.messages)
+        self.models = models.ModelsWithRawResponse(client.models)
+        self.beta = beta.BetaWithRawResponse(client.beta)
+
+
+class AsyncAnthropicWithRawResponse:
+    def __init__(self, client: AsyncAnthropic) -> None:
+        self.completions = completions.AsyncCompletionsWithRawResponse(client.completions)
+        self.messages = messages.AsyncMessagesWithRawResponse(client.messages)
+        self.models = models.AsyncModelsWithRawResponse(client.models)
+        self.beta = beta.AsyncBetaWithRawResponse(client.beta)
+
+
+class AnthropicWithStreamedResponse:
+    def __init__(self, client: Anthropic) -> None:
+        self.completions = completions.CompletionsWithStreamingResponse(client.completions)
+        self.messages = messages.MessagesWithStreamingResponse(client.messages)
+        self.models = models.ModelsWithStreamingResponse(client.models)
+        self.beta = beta.BetaWithStreamingResponse(client.beta)
+
+
+class AsyncAnthropicWithStreamedResponse:
+    def __init__(self, client: AsyncAnthropic) -> None:
+        self.completions = completions.AsyncCompletionsWithStreamingResponse(client.completions)
+        self.messages = messages.AsyncMessagesWithStreamingResponse(client.messages)
+        self.models = models.AsyncModelsWithStreamingResponse(client.models)
+        self.beta = beta.AsyncBetaWithStreamingResponse(client.beta)
+
+
+Client = Anthropic
+
+AsyncClient = AsyncAnthropic
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_compat.py b/.venv/lib/python3.12/site-packages/anthropic/_compat.py
new file mode 100644
index 00000000..92d9ee61
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_compat.py
@@ -0,0 +1,219 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast, overload
+from datetime import date, datetime
+from typing_extensions import Self, Literal
+
+import pydantic
+from pydantic.fields import FieldInfo
+
+from ._types import IncEx, StrBytesIntFloat
+
+_T = TypeVar("_T")
+_ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
+
+# --------------- Pydantic v2 compatibility ---------------
+
+# Pyright incorrectly reports some of our functions as overriding a method when they don't
+# pyright: reportIncompatibleMethodOverride=false
+
+PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
+
+# v1 re-exports
+if TYPE_CHECKING:
+
+    def parse_date(value: date | StrBytesIntFloat) -> date:  # noqa: ARG001
+        ...
+
+    def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:  # noqa: ARG001
+        ...
+
+    def get_args(t: type[Any]) -> tuple[Any, ...]:  # noqa: ARG001
+        ...
+
+    def is_union(tp: type[Any] | None) -> bool:  # noqa: ARG001
+        ...
+
+    def get_origin(t: type[Any]) -> type[Any] | None:  # noqa: ARG001
+        ...
+
+    def is_literal_type(type_: type[Any]) -> bool:  # noqa: ARG001
+        ...
+
+    def is_typeddict(type_: type[Any]) -> bool:  # noqa: ARG001
+        ...
+
+else:
+    if PYDANTIC_V2:
+        from pydantic.v1.typing import (
+            get_args as get_args,
+            is_union as is_union,
+            get_origin as get_origin,
+            is_typeddict as is_typeddict,
+            is_literal_type as is_literal_type,
+        )
+        from pydantic.v1.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
+    else:
+        from pydantic.typing import (
+            get_args as get_args,
+            is_union as is_union,
+            get_origin as get_origin,
+            is_typeddict as is_typeddict,
+            is_literal_type as is_literal_type,
+        )
+        from pydantic.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
+
+
+# refactored config
+if TYPE_CHECKING:
+    from pydantic import ConfigDict as ConfigDict
+else:
+    if PYDANTIC_V2:
+        from pydantic import ConfigDict
+    else:
+        # TODO: provide an error message here?
+        ConfigDict = None
+
+
+# renamed methods / properties
+def parse_obj(model: type[_ModelT], value: object) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_validate(value)
+    else:
+        return cast(_ModelT, model.parse_obj(value))  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+
+
+def field_is_required(field: FieldInfo) -> bool:
+    if PYDANTIC_V2:
+        return field.is_required()
+    return field.required  # type: ignore
+
+
+def field_get_default(field: FieldInfo) -> Any:
+    value = field.get_default()
+    if PYDANTIC_V2:
+        from pydantic_core import PydanticUndefined
+
+        if value == PydanticUndefined:
+            return None
+        return value
+    return value
+
+
+def field_outer_type(field: FieldInfo) -> Any:
+    if PYDANTIC_V2:
+        return field.annotation
+    return field.outer_type_  # type: ignore
+
+
+def get_model_config(model: type[pydantic.BaseModel]) -> Any:
+    if PYDANTIC_V2:
+        return model.model_config
+    return model.__config__  # type: ignore
+
+
+def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
+    if PYDANTIC_V2:
+        return model.model_fields
+    return model.__fields__  # type: ignore
+
+
+def model_copy(model: _ModelT, *, deep: bool = False) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_copy(deep=deep)
+    return model.copy(deep=deep)  # type: ignore
+
+
+def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
+    if PYDANTIC_V2:
+        return model.model_dump_json(indent=indent)
+    return model.json(indent=indent)  # type: ignore
+
+
+def model_dump(
+    model: pydantic.BaseModel,
+    *,
+    exclude: IncEx | None = None,
+    exclude_unset: bool = False,
+    exclude_defaults: bool = False,
+    warnings: bool = True,
+    mode: Literal["json", "python"] = "python",
+) -> dict[str, Any]:
+    if PYDANTIC_V2 or hasattr(model, "model_dump"):
+        return model.model_dump(
+            mode=mode,
+            exclude=exclude,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            # warnings are not supported in Pydantic v1
+            warnings=warnings if PYDANTIC_V2 else True,
+        )
+    return cast(
+        "dict[str, Any]",
+        model.dict(  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+            exclude=exclude,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+        ),
+    )
+
+
+def model_parse(model: type[_ModelT], data: Any) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_validate(data)
+    return model.parse_obj(data)  # pyright: ignore[reportDeprecated]
+
+
+# generic models
+if TYPE_CHECKING:
+
+    class GenericModel(pydantic.BaseModel): ...
+
+else:
+    if PYDANTIC_V2:
+        # there no longer needs to be a distinction in v2 but
+        # we still have to create our own subclass to avoid
+        # inconsistent MRO ordering errors
+        class GenericModel(pydantic.BaseModel): ...
+
+    else:
+        import pydantic.generics
+
+        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel): ...
+
+
+# cached properties
+if TYPE_CHECKING:
+    cached_property = property
+
+    # we define a separate type (copied from typeshed)
+    # that represents that `cached_property` is `set`able
+    # at runtime, which differs from `@property`.
+    #
+    # this is a separate type as editors likely special case
+    # `@property` and we don't want to cause issues just to have
+    # more helpful internal types.
+
+    class typed_cached_property(Generic[_T]):
+        func: Callable[[Any], _T]
+        attrname: str | None
+
+        def __init__(self, func: Callable[[Any], _T]) -> None: ...
+
+        @overload
+        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self: ...
+
+        @overload
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T: ...
+
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self:
+            raise NotImplementedError()
+
+        def __set_name__(self, owner: type[Any], name: str) -> None: ...
+
+        # __set__ is not defined at runtime, but @cached_property is designed to be settable
+        def __set__(self, instance: object, value: _T) -> None: ...
+else:
+    from functools import cached_property as cached_property
+
+    typed_cached_property = cached_property
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_constants.py b/.venv/lib/python3.12/site-packages/anthropic/_constants.py
new file mode 100644
index 00000000..617c4b47
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_constants.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import httpx
+
+RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
+OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
+
+# default timeout is 10 minutes
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=10 * 60, connect=5.0)
+DEFAULT_MAX_RETRIES = 2
+DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=1000, max_keepalive_connections=100)
+
+INITIAL_RETRY_DELAY = 0.5
+MAX_RETRY_DELAY = 8.0
+
+HUMAN_PROMPT = "\n\nHuman:"
+
+AI_PROMPT = "\n\nAssistant:"
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_decoders/jsonl.py b/.venv/lib/python3.12/site-packages/anthropic/_decoders/jsonl.py
new file mode 100644
index 00000000..ac5ac74f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_decoders/jsonl.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+import json
+from typing_extensions import Generic, TypeVar, Iterator, AsyncIterator
+
+import httpx
+
+from .._models import construct_type_unchecked
+
+_T = TypeVar("_T")
+
+
+class JSONLDecoder(Generic[_T]):
+    """A decoder for [JSON Lines](https://jsonlines.org) format.
+
+    This class provides an iterator over a byte-iterator that parses each JSON Line
+    into a given type.
+    """
+
+    http_response: httpx.Response
+    """The HTTP response this decoder was constructed from"""
+
+    def __init__(
+        self,
+        *,
+        raw_iterator: Iterator[bytes],
+        line_type: type[_T],
+        http_response: httpx.Response,
+    ) -> None:
+        super().__init__()
+        self.http_response = http_response
+        self._raw_iterator = raw_iterator
+        self._line_type = line_type
+        self._iterator = self.__decode__()
+
+    def close(self) -> None:
+        """Close the response body stream.
+
+        This is called automatically if you consume the entire stream.
+        """
+        self.http_response.close()
+
+    def __decode__(self) -> Iterator[_T]:
+        buf = b""
+        for chunk in self._raw_iterator:
+            for line in chunk.splitlines(keepends=True):
+                buf += line
+                if buf.endswith((b"\r", b"\n", b"\r\n")):
+                    yield construct_type_unchecked(
+                        value=json.loads(buf),
+                        type_=self._line_type,
+                    )
+                    buf = b""
+
+        # flush
+        if buf:
+            yield construct_type_unchecked(
+                value=json.loads(buf),
+                type_=self._line_type,
+            )
+
+    def __next__(self) -> _T:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[_T]:
+        for item in self._iterator:
+            yield item
+
+
+class AsyncJSONLDecoder(Generic[_T]):
+    """A decoder for [JSON Lines](https://jsonlines.org) format.
+
+    This class provides an async iterator over a byte-iterator that parses each JSON Line
+    into a given type.
+    """
+
+    http_response: httpx.Response
+
+    def __init__(
+        self,
+        *,
+        raw_iterator: AsyncIterator[bytes],
+        line_type: type[_T],
+        http_response: httpx.Response,
+    ) -> None:
+        super().__init__()
+        self.http_response = http_response
+        self._raw_iterator = raw_iterator
+        self._line_type = line_type
+        self._iterator = self.__decode__()
+
+    async def close(self) -> None:
+        """Close the response body stream.
+
+        This is called automatically if you consume the entire stream.
+        """
+        await self.http_response.aclose()
+
+    async def __decode__(self) -> AsyncIterator[_T]:
+        buf = b""
+        async for chunk in self._raw_iterator:
+            for line in chunk.splitlines(keepends=True):
+                buf += line
+                if buf.endswith((b"\r", b"\n", b"\r\n")):
+                    yield construct_type_unchecked(
+                        value=json.loads(buf),
+                        type_=self._line_type,
+                    )
+                    buf = b""
+
+        # flush
+        if buf:
+            yield construct_type_unchecked(
+                value=json.loads(buf),
+                type_=self._line_type,
+            )
+
+    async def __anext__(self) -> _T:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[_T]:
+        async for item in self._iterator:
+            yield item
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_exceptions.py b/.venv/lib/python3.12/site-packages/anthropic/_exceptions.py
new file mode 100644
index 00000000..2bf3e81a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_exceptions.py
@@ -0,0 +1,126 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+__all__ = [
+    "BadRequestError",
+    "AuthenticationError",
+    "PermissionDeniedError",
+    "NotFoundError",
+    "ConflictError",
+    "UnprocessableEntityError",
+    "RateLimitError",
+    "InternalServerError",
+]
+
+
+class AnthropicError(Exception):
+    pass
+
+
+class APIError(AnthropicError):
+    message: str
+    request: httpx.Request
+
+    body: object | None
+    """The API response body.
+
+    If the API responded with a valid JSON structure then this property will be the
+    decoded result.
+
+    If it isn't a valid JSON structure then this will be the raw response.
+
+    If there was no response associated with this error then it will be `None`.
+    """
+
+    def __init__(self, message: str, request: httpx.Request, *, body: object | None) -> None:  # noqa: ARG002
+        super().__init__(message)
+        self.request = request
+        self.message = message
+        self.body = body
+
+
+class APIResponseValidationError(APIError):
+    response: httpx.Response
+    status_code: int
+
+    def __init__(self, response: httpx.Response, body: object | None, *, message: str | None = None) -> None:
+        super().__init__(message or "Data returned by API invalid for expected schema.", response.request, body=body)
+        self.response = response
+        self.status_code = response.status_code
+
+
+class APIStatusError(APIError):
+    """Raised when an API response has a status code of 4xx or 5xx."""
+
+    response: httpx.Response
+    status_code: int
+    request_id: str | None
+
+    def __init__(self, message: str, *, response: httpx.Response, body: object | None) -> None:
+        super().__init__(message, response.request, body=body)
+        self.response = response
+        self.status_code = response.status_code
+        self.request_id = response.headers.get("request-id")
+
+
+class APIConnectionError(APIError):
+    def __init__(self, *, message: str = "Connection error.", request: httpx.Request) -> None:
+        super().__init__(message, request, body=None)
+
+
+class APITimeoutError(APIConnectionError):
+    def __init__(self, request: httpx.Request) -> None:
+        super().__init__(message="Request timed out.", request=request)
+
+
+class BadRequestError(APIStatusError):
+    status_code: Literal[400] = 400  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class AuthenticationError(APIStatusError):
+    status_code: Literal[401] = 401  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class PermissionDeniedError(APIStatusError):
+    status_code: Literal[403] = 403  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class NotFoundError(APIStatusError):
+    status_code: Literal[404] = 404  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class ConflictError(APIStatusError):
+    status_code: Literal[409] = 409  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class RequestTooLargeError(APIStatusError):
+    status_code: Literal[413] = 413  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class UnprocessableEntityError(APIStatusError):
+    status_code: Literal[422] = 422  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class RateLimitError(APIStatusError):
+    status_code: Literal[429] = 429  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class ServiceUnavailableError(APIStatusError):
+    status_code: Literal[503] = 503  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class OverloadedError(APIStatusError):
+    status_code: Literal[529] = 529  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class DeadlineExceededError(APIStatusError):
+    status_code: Literal[504] = 504  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class InternalServerError(APIStatusError):
+    pass
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_files.py b/.venv/lib/python3.12/site-packages/anthropic/_files.py
new file mode 100644
index 00000000..715cc207
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_files.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+import io
+import os
+import pathlib
+from typing import overload
+from typing_extensions import TypeGuard
+
+import anyio
+
+from ._types import (
+    FileTypes,
+    FileContent,
+    RequestFiles,
+    HttpxFileTypes,
+    Base64FileInput,
+    HttpxFileContent,
+    HttpxRequestFiles,
+)
+from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
+
+
+def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
+    return isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
+
+
+def is_file_content(obj: object) -> TypeGuard[FileContent]:
+    return (
+        isinstance(obj, bytes) or isinstance(obj, tuple) or isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
+    )
+
+
+def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
+    if not is_file_content(obj):
+        prefix = f"Expected entry at `{key}`" if key is not None else f"Expected file input `{obj!r}`"
+        raise RuntimeError(
+            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead."
+        ) from None
+
+
+@overload
+def to_httpx_files(files: None) -> None: ...
+
+
+@overload
+def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
+
+
+def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
+    if files is None:
+        return None
+
+    if is_mapping_t(files):
+        files = {key: _transform_file(file) for key, file in files.items()}
+    elif is_sequence_t(files):
+        files = [(key, _transform_file(file)) for key, file in files]
+    else:
+        raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence")
+
+    return files
+
+
+def _transform_file(file: FileTypes) -> HttpxFileTypes:
+    if is_file_content(file):
+        if isinstance(file, os.PathLike):
+            path = pathlib.Path(file)
+            return (path.name, path.read_bytes())
+
+        return file
+
+    if is_tuple_t(file):
+        return (file[0], _read_file_content(file[1]), *file[2:])
+
+    raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
+
+
+def _read_file_content(file: FileContent) -> HttpxFileContent:
+    if isinstance(file, os.PathLike):
+        return pathlib.Path(file).read_bytes()
+    return file
+
+
+@overload
+async def async_to_httpx_files(files: None) -> None: ...
+
+
+@overload
+async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
+
+
+async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
+    if files is None:
+        return None
+
+    if is_mapping_t(files):
+        files = {key: await _async_transform_file(file) for key, file in files.items()}
+    elif is_sequence_t(files):
+        files = [(key, await _async_transform_file(file)) for key, file in files]
+    else:
+        raise TypeError("Unexpected file type input {type(files)}, expected mapping or sequence")
+
+    return files
+
+
+async def _async_transform_file(file: FileTypes) -> HttpxFileTypes:
+    if is_file_content(file):
+        if isinstance(file, os.PathLike):
+            path = anyio.Path(file)
+            return (path.name, await path.read_bytes())
+
+        return file
+
+    if is_tuple_t(file):
+        return (file[0], await _async_read_file_content(file[1]), *file[2:])
+
+    raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
+
+
+async def _async_read_file_content(file: FileContent) -> HttpxFileContent:
+    if isinstance(file, os.PathLike):
+        return await anyio.Path(file).read_bytes()
+
+    return file
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_legacy_response.py b/.venv/lib/python3.12/site-packages/anthropic/_legacy_response.py
new file mode 100644
index 00000000..5703932e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_legacy_response.py
@@ -0,0 +1,511 @@
+from __future__ import annotations
+
+import os
+import inspect
+import logging
+import datetime
+import functools
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+    overload,
+)
+from typing_extensions import Awaitable, ParamSpec, override, deprecated, get_origin
+
+import anyio
+import httpx
+import pydantic
+
+from ._types import NoneType
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type
+from ._models import BaseModel, is_basemodel, add_request_id
+from ._constants import RAW_RESPONSE_HEADER
+from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
+from ._exceptions import APIResponseValidationError
+from ._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder
+
+if TYPE_CHECKING:
+    from ._models import FinalRequestOptions
+    from ._base_client import BaseClient
+
+
+P = ParamSpec("P")
+R = TypeVar("R")
+_T = TypeVar("_T")
+_T_co = TypeVar("_T_co", covariant=True)
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class LegacyAPIResponse(Generic[R]):
+    """This is a legacy class as it will be replaced by `APIResponse`
+    and `AsyncAPIResponse` in the `_response.py` file in the next major
+    release.
+
+    For the sync client this will mostly be the same with the exception
+    of `content` & `text` will be methods instead of properties. In the
+    async client, all methods will be async.
+
+    A migration script will be provided & the migration in general should
+    be smooth.
+    """
+
+    _cast_to: type[R]
+    _client: BaseClient[Any, Any]
+    _parsed_by_type: dict[type[Any], Any]
+    _stream: bool
+    _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
+    _options: FinalRequestOptions
+
+    http_response: httpx.Response
+
+    retries_taken: int
+    """The number of retries made. If no retries happened this will be `0`"""
+
+    def __init__(
+        self,
+        *,
+        raw: httpx.Response,
+        cast_to: type[R],
+        client: BaseClient[Any, Any],
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        options: FinalRequestOptions,
+        retries_taken: int = 0,
+    ) -> None:
+        self._cast_to = cast_to
+        self._client = client
+        self._parsed_by_type = {}
+        self._stream = stream
+        self._stream_cls = stream_cls
+        self._options = options
+        self.http_response = raw
+        self.retries_taken = retries_taken
+
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T: ...
+
+    @overload
+    def parse(self) -> R: ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        NOTE: For the async client: this will become a coroutine in the next major version.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from anthropic import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
+        self._parsed_by_type[cache_key] = parsed
+        return cast(R, parsed)
+
+    @property
+    def headers(self) -> httpx.Headers:
+        return self.http_response.headers
+
+    @property
+    def http_request(self) -> httpx.Request:
+        return self.http_response.request
+
+    @property
+    def status_code(self) -> int:
+        return self.http_response.status_code
+
+    @property
+    def url(self) -> httpx.URL:
+        return self.http_response.url
+
+    @property
+    def method(self) -> str:
+        return self.http_request.method
+
+    @property
+    def content(self) -> bytes:
+        """Return the binary response content.
+
+        NOTE: this will be removed in favour of `.read()` in the
+        next major version.
+        """
+        return self.http_response.content
+
+    @property
+    def text(self) -> str:
+        """Return the decoded response content.
+
+        NOTE: this will be turned into a method in the next major version.
+        """
+        return self.http_response.text
+
+    @property
+    def http_version(self) -> str:
+        return self.http_response.http_version
+
+    @property
+    def is_closed(self) -> bool:
+        return self.http_response.is_closed
+
+    @property
+    def elapsed(self) -> datetime.timedelta:
+        """The time taken for the complete request/response cycle to complete."""
+        return self.http_response.elapsed
+
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin):
+            if issubclass(cast(Any, origin), JSONLDecoder):
+                return cast(
+                    R,
+                    cast("type[JSONLDecoder[Any]]", cast_to)(
+                        raw_iterator=self.http_response.iter_bytes(chunk_size=64),
+                        line_type=extract_type_arg(cast_to, 0),
+                        http_response=self.http_response,
+                    ),
+                )
+
+            if issubclass(cast(Any, origin), AsyncJSONLDecoder):
+                return cast(
+                    R,
+                    cast("type[AsyncJSONLDecoder[Any]]", cast_to)(
+                        raw_iterator=self.http_response.aiter_bytes(chunk_size=64),
+                        line_type=extract_type_arg(cast_to, 0),
+                        http_response=self.http_response,
+                    ),
+                )
+
+        if self._stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
+
+                return cast(
+                    _T,
+                    to(
+                        cast_to=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            if self._stream_cls:
+                return cast(
+                    R,
+                    self._stream_cls(
+                        cast_to=extract_stream_chunk_type(self._stream_cls),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            stream_cls = cast("type[Stream[Any]] | type[AsyncStream[Any]] | None", self._client._default_stream_cls)
+            if stream_cls is None:
+                raise MissingStreamClassError()
+
+            return cast(
+                R,
+                stream_cls(
+                    cast_to=cast_to,
+                    response=self.http_response,
+                    client=cast(Any, self._client),
+                ),
+            )
+
+        if cast_to is NoneType:
+            return cast(R, None)
+
+        response = self.http_response
+        if cast_to == str:
+            return cast(R, response.text)
+
+        if cast_to == int:
+            return cast(R, int(response.text))
+
+        if cast_to == float:
+            return cast(R, float(response.text))
+
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
+
+        if inspect.isclass(origin) and issubclass(origin, HttpxBinaryResponseContent):
+            return cast(R, cast_to(response))  # type: ignore
+
+        if origin == LegacyAPIResponse:
+            raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
+
+        if inspect.isclass(
+            origin  # pyright: ignore[reportUnknownArgumentType]
+        ) and issubclass(origin, httpx.Response):
+            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+            # and pass that class to our request functions. We cannot change the variance to be either
+            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+            # the response class ourselves but that is something that should be supported directly in httpx
+            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+            if cast_to != httpx.Response:
+                raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
+            return cast(R, response)
+
+        if (
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
+            raise TypeError("Pydantic models must subclass our base model type, e.g. `from anthropic import BaseModel`")
+
+        if (
+            cast_to is not object
+            and not origin is list
+            and not origin is dict
+            and not origin is Union
+            and not issubclass(origin, BaseModel)
+        ):
+            raise RuntimeError(
+                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
+            )
+
+        # split is required to handle cases where additional information is included
+        # in the response, e.g. application/json; charset=utf-8
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
+        if content_type != "application/json":
+            if is_basemodel(cast_to):
+                try:
+                    data = response.json()
+                except Exception as exc:
+                    log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
+                else:
+                    return self._client._process_response_data(
+                        data=data,
+                        cast_to=cast_to,  # type: ignore
+                        response=response,
+                    )
+
+            if self._client._strict_response_validation:
+                raise APIResponseValidationError(
+                    response=response,
+                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
+                    body=response.text,
+                )
+
+            # If the API responds with content that isn't JSON then we just return
+            # the (decoded) text without performing any parsing so that you can still
+            # handle the response however you need to.
+            return response.text  # type: ignore
+
+        data = response.json()
+
+        return self._client._process_response_data(
+            data=data,
+            cast_to=cast_to,  # type: ignore
+            response=response,
+        )
+
+    @override
+    def __repr__(self) -> str:
+        return f"<APIResponse [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+
+
+class MissingStreamClassError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `anthropic._streaming` for reference",
+        )
+
+
+def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[LegacyAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], await func(*args, **kwargs))
+
+    return wrapped
+
+
+class HttpxBinaryResponseContent:
+    response: httpx.Response
+
+    def __init__(self, response: httpx.Response) -> None:
+        self.response = response
+
+    @property
+    def content(self) -> bytes:
+        return self.response.content
+
+    @property
+    def text(self) -> str:
+        return self.response.text
+
+    @property
+    def encoding(self) -> str | None:
+        return self.response.encoding
+
+    @property
+    def charset_encoding(self) -> str | None:
+        return self.response.charset_encoding
+
+    def json(self, **kwargs: Any) -> Any:
+        return self.response.json(**kwargs)
+
+    def read(self) -> bytes:
+        return self.response.read()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_bytes(chunk_size)
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        return self.response.iter_text(chunk_size)
+
+    def iter_lines(self) -> Iterator[str]:
+        return self.response.iter_lines()
+
+    def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_raw(chunk_size)
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `client.with_streaming_response.foo().stream_to_file('my_filename.txt')`
+        """
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes():
+                f.write(data)
+
+    @deprecated(
+        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
+    )
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes(chunk_size):
+                f.write(data)
+
+    def close(self) -> None:
+        return self.response.close()
+
+    async def aread(self) -> bytes:
+        return await self.response.aread()
+
+    async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_bytes(chunk_size)
+
+    async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        return self.response.aiter_text(chunk_size)
+
+    async def aiter_lines(self) -> AsyncIterator[str]:
+        return self.response.aiter_lines()
+
+    async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_raw(chunk_size)
+
+    @deprecated(
+        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
+    )
+    async def astream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.response.aiter_bytes(chunk_size):
+                await f.write(data)
+
+    async def aclose(self) -> None:
+        return await self.response.aclose()
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_models.py b/.venv/lib/python3.12/site-packages/anthropic/_models.py
new file mode 100644
index 00000000..dad8df9e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_models.py
@@ -0,0 +1,832 @@
+from __future__ import annotations
+
+import os
+import inspect
+from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, Optional, cast
+from datetime import date, datetime
+from typing_extensions import (
+    Unpack,
+    Literal,
+    ClassVar,
+    Protocol,
+    Required,
+    ParamSpec,
+    TypedDict,
+    TypeGuard,
+    final,
+    override,
+    runtime_checkable,
+)
+
+import pydantic
+import pydantic.generics
+from pydantic.fields import FieldInfo
+
+from ._types import (
+    Body,
+    IncEx,
+    Query,
+    ModelT,
+    Headers,
+    Timeout,
+    NotGiven,
+    AnyMapping,
+    HttpxRequestFiles,
+)
+from ._utils import (
+    PropertyInfo,
+    is_list,
+    is_given,
+    json_safe,
+    lru_cache,
+    is_mapping,
+    parse_date,
+    coerce_boolean,
+    parse_datetime,
+    strip_not_given,
+    extract_type_arg,
+    is_annotated_type,
+    is_type_alias_type,
+    strip_annotated_type,
+)
+from ._compat import (
+    PYDANTIC_V2,
+    ConfigDict,
+    GenericModel as BaseGenericModel,
+    get_args,
+    is_union,
+    parse_obj,
+    get_origin,
+    is_literal_type,
+    get_model_config,
+    get_model_fields,
+    field_get_default,
+)
+from ._constants import RAW_RESPONSE_HEADER
+
+if TYPE_CHECKING:
+    from pydantic_core.core_schema import ModelField, LiteralSchema, ModelFieldsSchema
+
+__all__ = ["BaseModel", "GenericModel"]
+
+_T = TypeVar("_T")
+_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel")
+
+P = ParamSpec("P")
+
+
+@runtime_checkable
+class _ConfigProtocol(Protocol):
+    allow_population_by_field_name: bool
+
+
+class BaseModel(pydantic.BaseModel):
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(
+            extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true"))
+        )
+    else:
+
+        @property
+        @override
+        def model_fields_set(self) -> set[str]:
+            # a forwards-compat shim for pydantic v2
+            return self.__fields_set__  # type: ignore
+
+        class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
+            extra: Any = pydantic.Extra.allow  # type: ignore
+
+    if TYPE_CHECKING:
+        _request_id: Optional[str] = None
+        """The ID of the request, returned via the `request-id` header. Useful for debugging requests and reporting issues to Anthropic.
+        This will **only** be set for the top-level response object, it will not be defined for nested objects. For example:
+        
+        ```py
+        message = await client.messages.create(...)
+        message._request_id  # req_xxx
+        message.usage._request_id  # raises `AttributeError`
+        ```
+
+        Note: unlike other properties that use an `_` prefix, this property
+        *is* public. Unless documented otherwise, all other `_` prefix properties,
+        methods and modules are *private*.
+        """
+
+    def to_dict(
+        self,
+        *,
+        mode: Literal["json", "python"] = "python",
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> dict[str, object]:
+        """Recursively generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            mode:
+                If mode is 'json', the dictionary will only contain JSON serializable types. e.g. `datetime` will be turned into a string, `"2024-3-22T18:11:19.117000Z"`.
+                If mode is 'python', the dictionary may contain any Python objects. e.g. `datetime(2024, 3, 22)`
+
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+            exclude_none: Whether to exclude fields that have a value of `None` from the output.
+            warnings: Whether to log warnings when invalid fields are encountered. This is only supported in Pydantic v2.
+        """
+        return self.model_dump(
+            mode=mode,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
+    def to_json(
+        self,
+        *,
+        indent: int | None = 2,
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> str:
+        """Generates a JSON string representing this model as it would be received from or sent to the API (but with indentation).
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            indent: Indentation to use in the JSON output. If `None` is passed, the output will be compact. Defaults to `2`
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that have the default value.
+            exclude_none: Whether to exclude fields that have a value of `None`.
+            warnings: Whether to show any warnings that occurred during serialization. This is only supported in Pydantic v2.
+        """
+        return self.model_dump_json(
+            indent=indent,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
+    @override
+    def __str__(self) -> str:
+        # mypy complains about an invalid self arg
+        return f"{self.__repr_name__()}({self.__repr_str__(', ')})"  # type: ignore[misc]
+
+    # Override the 'construct' method in a way that supports recursive parsing without validation.
+    # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
+    @classmethod
+    @override
+    def construct(  # pyright: ignore[reportIncompatibleMethodOverride]
+        __cls: Type[ModelT],
+        _fields_set: set[str] | None = None,
+        **values: object,
+    ) -> ModelT:
+        m = __cls.__new__(__cls)
+        fields_values: dict[str, object] = {}
+
+        config = get_model_config(__cls)
+        populate_by_name = (
+            config.allow_population_by_field_name
+            if isinstance(config, _ConfigProtocol)
+            else config.get("populate_by_name")
+        )
+
+        if _fields_set is None:
+            _fields_set = set()
+
+        model_fields = get_model_fields(__cls)
+        for name, field in model_fields.items():
+            key = field.alias
+            if key is None or (key not in values and populate_by_name):
+                key = name
+
+            if key in values:
+                fields_values[name] = _construct_field(value=values[key], field=field, key=key)
+                _fields_set.add(name)
+            else:
+                fields_values[name] = field_get_default(field)
+
+        _extra = {}
+        for key, value in values.items():
+            if key not in model_fields:
+                if PYDANTIC_V2:
+                    _extra[key] = value
+                else:
+                    _fields_set.add(key)
+                    fields_values[key] = value
+
+        object.__setattr__(m, "__dict__", fields_values)
+
+        if PYDANTIC_V2:
+            # these properties are copied from Pydantic's `model_construct()` method
+            object.__setattr__(m, "__pydantic_private__", None)
+            object.__setattr__(m, "__pydantic_extra__", _extra)
+            object.__setattr__(m, "__pydantic_fields_set__", _fields_set)
+        else:
+            # init_private_attributes() does not exist in v2
+            m._init_private_attributes()  # type: ignore
+
+            # copied from Pydantic v1's `construct()` method
+            object.__setattr__(m, "__fields_set__", _fields_set)
+
+        return m
+
+    if not TYPE_CHECKING:
+        # type checkers incorrectly complain about this assignment
+        # because the type signatures are technically different
+        # although not in practice
+        model_construct = construct
+
+    if not PYDANTIC_V2:
+        # we define aliases for some of the new pydantic v2 methods so
+        # that we can just document these methods without having to specify
+        # a specific pydantic version as some users may not know which
+        # pydantic version they are currently using
+
+        @override
+        def model_dump(
+            self,
+            *,
+            mode: Literal["json", "python"] | str = "python",
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
+            by_alias: bool = False,
+            exclude_unset: bool = False,
+            exclude_defaults: bool = False,
+            exclude_none: bool = False,
+            round_trip: bool = False,
+            warnings: bool | Literal["none", "warn", "error"] = True,
+            context: dict[str, Any] | None = None,
+            serialize_as_any: bool = False,
+        ) -> dict[str, Any]:
+            """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump
+
+            Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+            Args:
+                mode: The mode in which `to_python` should run.
+                    If mode is 'json', the dictionary will only contain JSON serializable types.
+                    If mode is 'python', the dictionary may contain any Python objects.
+                include: A list of fields to include in the output.
+                exclude: A list of fields to exclude from the output.
+                by_alias: Whether to use the field's alias in the dictionary key if defined.
+                exclude_unset: Whether to exclude fields that are unset or None from the output.
+                exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+                exclude_none: Whether to exclude fields that have a value of `None` from the output.
+                round_trip: Whether to enable serialization and deserialization round-trip support.
+                warnings: Whether to log warnings when invalid fields are encountered.
+
+            Returns:
+                A dictionary representation of the model.
+            """
+            if mode not in {"json", "python"}:
+                raise ValueError("mode must be either 'json' or 'python'")
+            if round_trip != False:
+                raise ValueError("round_trip is only supported in Pydantic v2")
+            if warnings != True:
+                raise ValueError("warnings is only supported in Pydantic v2")
+            if context is not None:
+                raise ValueError("context is only supported in Pydantic v2")
+            if serialize_as_any != False:
+                raise ValueError("serialize_as_any is only supported in Pydantic v2")
+            dumped = super().dict(  # pyright: ignore[reportDeprecated]
+                include=include,
+                exclude=exclude,
+                by_alias=by_alias,
+                exclude_unset=exclude_unset,
+                exclude_defaults=exclude_defaults,
+                exclude_none=exclude_none,
+            )
+
+            return cast(dict[str, Any], json_safe(dumped)) if mode == "json" else dumped
+
+        @override
+        def model_dump_json(
+            self,
+            *,
+            indent: int | None = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
+            by_alias: bool = False,
+            exclude_unset: bool = False,
+            exclude_defaults: bool = False,
+            exclude_none: bool = False,
+            round_trip: bool = False,
+            warnings: bool | Literal["none", "warn", "error"] = True,
+            context: dict[str, Any] | None = None,
+            serialize_as_any: bool = False,
+        ) -> str:
+            """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json
+
+            Generates a JSON representation of the model using Pydantic's `to_json` method.
+
+            Args:
+                indent: Indentation to use in the JSON output. If None is passed, the output will be compact.
+                include: Field(s) to include in the JSON output. Can take either a string or set of strings.
+                exclude: Field(s) to exclude from the JSON output. Can take either a string or set of strings.
+                by_alias: Whether to serialize using field aliases.
+                exclude_unset: Whether to exclude fields that have not been explicitly set.
+                exclude_defaults: Whether to exclude fields that have the default value.
+                exclude_none: Whether to exclude fields that have a value of `None`.
+                round_trip: Whether to use serialization/deserialization between JSON and class instance.
+                warnings: Whether to show any warnings that occurred during serialization.
+
+            Returns:
+                A JSON string representation of the model.
+            """
+            if round_trip != False:
+                raise ValueError("round_trip is only supported in Pydantic v2")
+            if warnings != True:
+                raise ValueError("warnings is only supported in Pydantic v2")
+            if context is not None:
+                raise ValueError("context is only supported in Pydantic v2")
+            if serialize_as_any != False:
+                raise ValueError("serialize_as_any is only supported in Pydantic v2")
+            return super().json(  # type: ignore[reportDeprecated]
+                indent=indent,
+                include=include,
+                exclude=exclude,
+                by_alias=by_alias,
+                exclude_unset=exclude_unset,
+                exclude_defaults=exclude_defaults,
+                exclude_none=exclude_none,
+            )
+
+
+def _construct_field(value: object, field: FieldInfo, key: str) -> object:
+    if value is None:
+        return field_get_default(field)
+
+    if PYDANTIC_V2:
+        type_ = field.annotation
+    else:
+        type_ = cast(type, field.outer_type_)  # type: ignore
+
+    if type_ is None:
+        raise RuntimeError(f"Unexpected field type is None for {key}")
+
+    return construct_type(value=value, type_=type_)
+
+
+def is_basemodel(type_: type) -> bool:
+    """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`"""
+    if is_union(type_):
+        for variant in get_args(type_):
+            if is_basemodel(variant):
+                return True
+
+        return False
+
+    return is_basemodel_type(type_)
+
+
+def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
+    origin = get_origin(type_) or type_
+    if not inspect.isclass(origin):
+        return False
+    return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
+
+
+def build(
+    base_model_cls: Callable[P, _BaseModelT],
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> _BaseModelT:
+    """Construct a BaseModel class without validation.
+
+    This is useful for cases where you need to instantiate a `BaseModel`
+    from an API response as this provides type-safe params which isn't supported
+    by helpers like `construct_type()`.
+
+    ```py
+    build(MyModel, my_field_a="foo", my_field_b=123)
+    ```
+    """
+    if args:
+        raise TypeError(
+            "Received positional arguments which are not supported; Keyword arguments must be used instead",
+        )
+
+    return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs))
+
+
+def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T:
+    """Loose coercion to the expected type with construction of nested values.
+
+    Note: the returned value from this function is not guaranteed to match the
+    given type.
+    """
+    return cast(_T, construct_type(value=value, type_=type_))
+
+
+def construct_type(*, value: object, type_: object) -> object:
+    """Loose coercion to the expected type with construction of nested values.
+
+    If the given value does not match the expected type then it is returned as-is.
+    """
+
+    # store a reference to the original type we were given before we extract any inner
+    # types so that we can properly resolve forward references in `TypeAliasType` annotations
+    original_type = None
+
+    # we allow `object` as the input type because otherwise, passing things like
+    # `Literal['value']` will be reported as a type error by type checkers
+    type_ = cast("type[object]", type_)
+    if is_type_alias_type(type_):
+        original_type = type_  # type: ignore[unreachable]
+        type_ = type_.__value__  # type: ignore[unreachable]
+
+    # unwrap `Annotated[T, ...]` -> `T`
+    if is_annotated_type(type_):
+        meta: tuple[Any, ...] = get_args(type_)[1:]
+        type_ = extract_type_arg(type_, 0)
+    else:
+        meta = tuple()
+
+    # we need to use the origin class for any types that are subscripted generics
+    # e.g. Dict[str, object]
+    origin = get_origin(type_) or type_
+    args = get_args(type_)
+
+    if is_union(origin):
+        try:
+            return validate_type(type_=cast("type[object]", original_type or type_), value=value)
+        except Exception:
+            pass
+
+        # if the type is a discriminated union then we want to construct the right variant
+        # in the union, even if the data doesn't match exactly, otherwise we'd break code
+        # that relies on the constructed class types, e.g.
+        #
+        # class FooType:
+        #   kind: Literal['foo']
+        #   value: str
+        #
+        # class BarType:
+        #   kind: Literal['bar']
+        #   value: int
+        #
+        # without this block, if the data we get is something like `{'kind': 'bar', 'value': 'foo'}` then
+        # we'd end up constructing `FooType` when it should be `BarType`.
+        discriminator = _build_discriminated_union_meta(union=type_, meta_annotations=meta)
+        if discriminator and is_mapping(value):
+            variant_value = value.get(discriminator.field_alias_from or discriminator.field_name)
+            if variant_value and isinstance(variant_value, str):
+                variant_type = discriminator.mapping.get(variant_value)
+                if variant_type:
+                    return construct_type(type_=variant_type, value=value)
+
+        # if the data is not valid, use the first variant that doesn't fail while deserializing
+        for variant in args:
+            try:
+                return construct_type(value=value, type_=variant)
+            except Exception:
+                continue
+
+        raise RuntimeError(f"Could not convert data into a valid instance of {type_}")
+
+    if origin == dict:
+        if not is_mapping(value):
+            return value
+
+        _, items_type = get_args(type_)  # Dict[_, items_type]
+        return {key: construct_type(value=item, type_=items_type) for key, item in value.items()}
+
+    if (
+        not is_literal_type(type_)
+        and inspect.isclass(origin)
+        and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel))
+    ):
+        if is_list(value):
+            return [cast(Any, type_).construct(**entry) if is_mapping(entry) else entry for entry in value]
+
+        if is_mapping(value):
+            if issubclass(type_, BaseModel):
+                return type_.construct(**value)  # type: ignore[arg-type]
+
+            return cast(Any, type_).construct(**value)
+
+    if origin == list:
+        if not is_list(value):
+            return value
+
+        inner_type = args[0]  # List[inner_type]
+        return [construct_type(value=entry, type_=inner_type) for entry in value]
+
+    if origin == float:
+        if isinstance(value, int):
+            coerced = float(value)
+            if coerced != value:
+                return value
+            return coerced
+
+        return value
+
+    if type_ == datetime:
+        try:
+            return parse_datetime(value)  # type: ignore
+        except Exception:
+            return value
+
+    if type_ == date:
+        try:
+            return parse_date(value)  # type: ignore
+        except Exception:
+            return value
+
+    return value
+
+
+@runtime_checkable
+class CachedDiscriminatorType(Protocol):
+    __discriminator__: DiscriminatorDetails
+
+
+class DiscriminatorDetails:
+    field_name: str
+    """The name of the discriminator field in the variant class, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo']
+    ```
+
+    Will result in field_name='type'
+    """
+
+    field_alias_from: str | None
+    """The name of the discriminator field in the API response, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo'] = Field(alias='type_from_api')
+    ```
+
+    Will result in field_alias_from='type_from_api'
+    """
+
+    mapping: dict[str, type]
+    """Mapping of discriminator value to variant type, e.g.
+
+    {'foo': FooVariant, 'bar': BarVariant}
+    """
+
+    def __init__(
+        self,
+        *,
+        mapping: dict[str, type],
+        discriminator_field: str,
+        discriminator_alias: str | None,
+    ) -> None:
+        self.mapping = mapping
+        self.field_name = discriminator_field
+        self.field_alias_from = discriminator_alias
+
+
+def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None:
+    if isinstance(union, CachedDiscriminatorType):
+        return union.__discriminator__
+
+    discriminator_field_name: str | None = None
+
+    for annotation in meta_annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.discriminator is not None:
+            discriminator_field_name = annotation.discriminator
+            break
+
+    if not discriminator_field_name:
+        return None
+
+    mapping: dict[str, type] = {}
+    discriminator_alias: str | None = None
+
+    for variant in get_args(union):
+        variant = strip_annotated_type(variant)
+        if is_basemodel_type(variant):
+            if PYDANTIC_V2:
+                field = _extract_field_schema_pv2(variant, discriminator_field_name)
+                if not field:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field.get("serialization_alias")
+
+                field_schema = field["schema"]
+
+                if field_schema["type"] == "literal":
+                    for entry in cast("LiteralSchema", field_schema)["expected"]:
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+            else:
+                field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name)  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+                if not field_info:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field_info.alias
+
+                if field_info.annotation and is_literal_type(field_info.annotation):
+                    for entry in get_args(field_info.annotation):
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+
+    if not mapping:
+        return None
+
+    details = DiscriminatorDetails(
+        mapping=mapping,
+        discriminator_field=discriminator_field_name,
+        discriminator_alias=discriminator_alias,
+    )
+    cast(CachedDiscriminatorType, union).__discriminator__ = details
+    return details
+
+
+def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
+    schema = model.__pydantic_core_schema__
+    if schema["type"] != "model":
+        return None
+
+    fields_schema = schema["schema"]
+    if fields_schema["type"] != "model-fields":
+        return None
+
+    fields_schema = cast("ModelFieldsSchema", fields_schema)
+
+    field = fields_schema["fields"].get(field_name)
+    if not field:
+        return None
+
+    return cast("ModelField", field)  # pyright: ignore[reportUnnecessaryCast]
+
+
+def validate_type(*, type_: type[_T], value: object) -> _T:
+    """Strict validation that the given value matches the expected type"""
+    if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
+        return cast(_T, parse_obj(type_, value))
+
+    return cast(_T, _validate_non_model_type(type_=type_, value=value))
+
+
+def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None:
+    """Add a pydantic config for the given type.
+
+    Note: this is a no-op on Pydantic v1.
+    """
+    setattr(typ, "__pydantic_config__", config)  # noqa: B010
+
+
+def add_request_id(obj: BaseModel, request_id: str | None) -> None:
+    obj._request_id = request_id
+
+    # in Pydantic v1, using setattr like we do above causes the attribute
+    # to be included when serializing the model which we don't want in this
+    # case so we need to explicitly exclude it
+    if not PYDANTIC_V2:
+        try:
+            exclude_fields = obj.__exclude_fields__  # type: ignore
+        except AttributeError:
+            cast(Any, obj).__exclude_fields__ = {"_request_id", "__exclude_fields__"}
+        else:
+            cast(Any, obj).__exclude_fields__ = {*(exclude_fields or {}), "_request_id", "__exclude_fields__"}
+
+
+# our use of subclasssing here causes weirdness for type checkers,
+# so we just pretend that we don't subclass
+if TYPE_CHECKING:
+    GenericModel = BaseModel
+else:
+
+    class GenericModel(BaseGenericModel, BaseModel):
+        pass
+
+
+if PYDANTIC_V2:
+    from pydantic import TypeAdapter as _TypeAdapter
+
+    _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=None)(_TypeAdapter))
+
+    if TYPE_CHECKING:
+        from pydantic import TypeAdapter
+    else:
+        TypeAdapter = _CachedTypeAdapter
+
+    def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
+        return TypeAdapter(type_).validate_python(value)
+
+elif not TYPE_CHECKING:  # TODO: condition is weird
+
+    class RootModel(GenericModel, Generic[_T]):
+        """Used as a placeholder to easily convert runtime types to a Pydantic format
+        to provide validation.
+
+        For example:
+        ```py
+        validated = RootModel[int](__root__="5").__root__
+        # validated: 5
+        ```
+        """
+
+        __root__: _T
+
+    def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
+        model = _create_pydantic_model(type_).validate(value)
+        return cast(_T, model.__root__)
+
+    def _create_pydantic_model(type_: _T) -> Type[RootModel[_T]]:
+        return RootModel[type_]  # type: ignore
+
+
+class FinalRequestOptionsInput(TypedDict, total=False):
+    method: Required[str]
+    url: Required[str]
+    params: Query
+    headers: Headers
+    max_retries: int
+    timeout: float | Timeout | None
+    files: HttpxRequestFiles | None
+    idempotency_key: str
+    json_data: Body
+    extra_json: AnyMapping
+
+
+@final
+class FinalRequestOptions(pydantic.BaseModel):
+    method: str
+    url: str
+    params: Query = {}
+    headers: Union[Headers, NotGiven] = NotGiven()
+    max_retries: Union[int, NotGiven] = NotGiven()
+    timeout: Union[float, Timeout, None, NotGiven] = NotGiven()
+    files: Union[HttpxRequestFiles, None] = None
+    idempotency_key: Union[str, None] = None
+    post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
+
+    # It should be noted that we cannot use `json` here as that would override
+    # a BaseModel method in an incompatible fashion.
+    json_data: Union[Body, None] = None
+    extra_json: Union[AnyMapping, None] = None
+
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
+    else:
+
+        class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
+            arbitrary_types_allowed: bool = True
+
+    def get_max_retries(self, max_retries: int) -> int:
+        if isinstance(self.max_retries, NotGiven):
+            return max_retries
+        return self.max_retries
+
+    def _strip_raw_response_header(self) -> None:
+        if not is_given(self.headers):
+            return
+
+        if self.headers.get(RAW_RESPONSE_HEADER):
+            self.headers = {**self.headers}
+            self.headers.pop(RAW_RESPONSE_HEADER)
+
+    # override the `construct` method so that we can run custom transformations.
+    # this is necessary as we don't want to do any actual runtime type checking
+    # (which means we can't use validators) but we do want to ensure that `NotGiven`
+    # values are not present
+    #
+    # type ignore required because we're adding explicit types to `**values`
+    @classmethod
+    def construct(  # type: ignore
+        cls,
+        _fields_set: set[str] | None = None,
+        **values: Unpack[FinalRequestOptionsInput],
+    ) -> FinalRequestOptions:
+        kwargs: dict[str, Any] = {
+            # we unconditionally call `strip_not_given` on any value
+            # as it will just ignore any non-mapping types
+            key: strip_not_given(value)
+            for key, value in values.items()
+        }
+        if PYDANTIC_V2:
+            return super().model_construct(_fields_set, **kwargs)
+        return cast(FinalRequestOptions, super().construct(_fields_set, **kwargs))  # pyright: ignore[reportDeprecated]
+
+    if not TYPE_CHECKING:
+        # type checkers incorrectly complain about this assignment
+        model_construct = construct
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_qs.py b/.venv/lib/python3.12/site-packages/anthropic/_qs.py
new file mode 100644
index 00000000..274320ca
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_qs.py
@@ -0,0 +1,150 @@
+from __future__ import annotations
+
+from typing import Any, List, Tuple, Union, Mapping, TypeVar
+from urllib.parse import parse_qs, urlencode
+from typing_extensions import Literal, get_args
+
+from ._types import NOT_GIVEN, NotGiven, NotGivenOr
+from ._utils import flatten
+
+_T = TypeVar("_T")
+
+
+ArrayFormat = Literal["comma", "repeat", "indices", "brackets"]
+NestedFormat = Literal["dots", "brackets"]
+
+PrimitiveData = Union[str, int, float, bool, None]
+# this should be Data = Union[PrimitiveData, "List[Data]", "Tuple[Data]", "Mapping[str, Data]"]
+# https://github.com/microsoft/pyright/issues/3555
+Data = Union[PrimitiveData, List[Any], Tuple[Any], "Mapping[str, Any]"]
+Params = Mapping[str, Data]
+
+
+class Querystring:
+    array_format: ArrayFormat
+    nested_format: NestedFormat
+
+    def __init__(
+        self,
+        *,
+        array_format: ArrayFormat = "repeat",
+        nested_format: NestedFormat = "brackets",
+    ) -> None:
+        self.array_format = array_format
+        self.nested_format = nested_format
+
+    def parse(self, query: str) -> Mapping[str, object]:
+        # Note: custom format syntax is not supported yet
+        return parse_qs(query)
+
+    def stringify(
+        self,
+        params: Params,
+        *,
+        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
+        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+    ) -> str:
+        return urlencode(
+            self.stringify_items(
+                params,
+                array_format=array_format,
+                nested_format=nested_format,
+            )
+        )
+
+    def stringify_items(
+        self,
+        params: Params,
+        *,
+        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
+        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+    ) -> list[tuple[str, str]]:
+        opts = Options(
+            qs=self,
+            array_format=array_format,
+            nested_format=nested_format,
+        )
+        return flatten([self._stringify_item(key, value, opts) for key, value in params.items()])
+
+    def _stringify_item(
+        self,
+        key: str,
+        value: Data,
+        opts: Options,
+    ) -> list[tuple[str, str]]:
+        if isinstance(value, Mapping):
+            items: list[tuple[str, str]] = []
+            nested_format = opts.nested_format
+            for subkey, subvalue in value.items():
+                items.extend(
+                    self._stringify_item(
+                        # TODO: error if unknown format
+                        f"{key}.{subkey}" if nested_format == "dots" else f"{key}[{subkey}]",
+                        subvalue,
+                        opts,
+                    )
+                )
+            return items
+
+        if isinstance(value, (list, tuple)):
+            array_format = opts.array_format
+            if array_format == "comma":
+                return [
+                    (
+                        key,
+                        ",".join(self._primitive_value_to_str(item) for item in value if item is not None),
+                    ),
+                ]
+            elif array_format == "repeat":
+                items = []
+                for item in value:
+                    items.extend(self._stringify_item(key, item, opts))
+                return items
+            elif array_format == "indices":
+                raise NotImplementedError("The array indices format is not supported yet")
+            elif array_format == "brackets":
+                items = []
+                key = key + "[]"
+                for item in value:
+                    items.extend(self._stringify_item(key, item, opts))
+                return items
+            else:
+                raise NotImplementedError(
+                    f"Unknown array_format value: {array_format}, choose from {', '.join(get_args(ArrayFormat))}"
+                )
+
+        serialised = self._primitive_value_to_str(value)
+        if not serialised:
+            return []
+        return [(key, serialised)]
+
+    def _primitive_value_to_str(self, value: PrimitiveData) -> str:
+        # copied from httpx
+        if value is True:
+            return "true"
+        elif value is False:
+            return "false"
+        elif value is None:
+            return ""
+        return str(value)
+
+
+_qs = Querystring()
+parse = _qs.parse
+stringify = _qs.stringify
+stringify_items = _qs.stringify_items
+
+
+class Options:
+    array_format: ArrayFormat
+    nested_format: NestedFormat
+
+    def __init__(
+        self,
+        qs: Querystring = _qs,
+        *,
+        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
+        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+    ) -> None:
+        self.array_format = qs.array_format if isinstance(array_format, NotGiven) else array_format
+        self.nested_format = qs.nested_format if isinstance(nested_format, NotGiven) else nested_format
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_resource.py b/.venv/lib/python3.12/site-packages/anthropic/_resource.py
new file mode 100644
index 00000000..f62cc2ba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_resource.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import time
+
+import anyio
+
+from ._base_client import SyncAPIClient, AsyncAPIClient
+
+
+class SyncAPIResource:
+    _client: SyncAPIClient
+
+    def __init__(self, client: SyncAPIClient) -> None:
+        self._client = client
+        self._get = client.get
+        self._post = client.post
+        self._patch = client.patch
+        self._put = client.put
+        self._delete = client.delete
+        self._get_api_list = client.get_api_list
+
+    def _sleep(self, seconds: float) -> None:
+        time.sleep(seconds)
+
+
+class AsyncAPIResource:
+    _client: AsyncAPIClient
+
+    def __init__(self, client: AsyncAPIClient) -> None:
+        self._client = client
+        self._get = client.get
+        self._post = client.post
+        self._patch = client.patch
+        self._put = client.put
+        self._delete = client.delete
+        self._get_api_list = client.get_api_list
+
+    async def _sleep(self, seconds: float) -> None:
+        await anyio.sleep(seconds)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_response.py b/.venv/lib/python3.12/site-packages/anthropic/_response.py
new file mode 100644
index 00000000..64a3f158
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_response.py
@@ -0,0 +1,872 @@
+from __future__ import annotations
+
+import os
+import inspect
+import logging
+import datetime
+import functools
+from types import TracebackType
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+    overload,
+)
+from typing_extensions import Awaitable, ParamSpec, override, get_origin
+
+import anyio
+import httpx
+import pydantic
+
+from ._types import NoneType
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base
+from ._models import BaseModel, is_basemodel, add_request_id
+from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
+from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
+from ._exceptions import AnthropicError, APIResponseValidationError
+from ._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder
+
+if TYPE_CHECKING:
+    from ._models import FinalRequestOptions
+    from ._base_client import BaseClient
+
+
+P = ParamSpec("P")
+R = TypeVar("R")
+_T = TypeVar("_T")
+_APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
+_AsyncAPIResponseT = TypeVar("_AsyncAPIResponseT", bound="AsyncAPIResponse[Any]")
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class BaseAPIResponse(Generic[R]):
+    _cast_to: type[R]
+    _client: BaseClient[Any, Any]
+    _parsed_by_type: dict[type[Any], Any]
+    _is_sse_stream: bool
+    _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
+    _options: FinalRequestOptions
+
+    http_response: httpx.Response
+
+    retries_taken: int
+    """The number of retries made. If no retries happened this will be `0`"""
+
+    def __init__(
+        self,
+        *,
+        raw: httpx.Response,
+        cast_to: type[R],
+        client: BaseClient[Any, Any],
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        options: FinalRequestOptions,
+        retries_taken: int = 0,
+    ) -> None:
+        self._cast_to = cast_to
+        self._client = client
+        self._parsed_by_type = {}
+        self._is_sse_stream = stream
+        self._stream_cls = stream_cls
+        self._options = options
+        self.http_response = raw
+        self.retries_taken = retries_taken
+
+    @property
+    def headers(self) -> httpx.Headers:
+        return self.http_response.headers
+
+    @property
+    def http_request(self) -> httpx.Request:
+        """Returns the httpx Request instance associated with the current response."""
+        return self.http_response.request
+
+    @property
+    def status_code(self) -> int:
+        return self.http_response.status_code
+
+    @property
+    def url(self) -> httpx.URL:
+        """Returns the URL for which the request was made."""
+        return self.http_response.url
+
+    @property
+    def method(self) -> str:
+        return self.http_request.method
+
+    @property
+    def http_version(self) -> str:
+        return self.http_response.http_version
+
+    @property
+    def elapsed(self) -> datetime.timedelta:
+        """The time taken for the complete request/response cycle to complete."""
+        return self.http_response.elapsed
+
+    @property
+    def is_closed(self) -> bool:
+        """Whether or not the response body has been closed.
+
+        If this is False then there is response data that has not been read yet.
+        You must either fully consume the response body or call `.close()`
+        before discarding the response to prevent resource leaks.
+        """
+        return self.http_response.is_closed
+
+    @override
+    def __repr__(self) -> str:
+        return (
+            f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+        )
+
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin):
+            if issubclass(cast(Any, origin), JSONLDecoder):
+                return cast(
+                    R,
+                    cast("type[JSONLDecoder[Any]]", cast_to)(
+                        raw_iterator=self.http_response.iter_bytes(chunk_size=64),
+                        line_type=extract_type_arg(cast_to, 0),
+                        http_response=self.http_response,
+                    ),
+                )
+
+            if issubclass(cast(Any, origin), AsyncJSONLDecoder):
+                return cast(
+                    R,
+                    cast("type[AsyncJSONLDecoder[Any]]", cast_to)(
+                        raw_iterator=self.http_response.aiter_bytes(chunk_size=64),
+                        line_type=extract_type_arg(cast_to, 0),
+                        http_response=self.http_response,
+                    ),
+                )
+
+        if self._is_sse_stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
+
+                return cast(
+                    _T,
+                    to(
+                        cast_to=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            if self._stream_cls:
+                return cast(
+                    R,
+                    self._stream_cls(
+                        cast_to=extract_stream_chunk_type(self._stream_cls),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            stream_cls = cast("type[Stream[Any]] | type[AsyncStream[Any]] | None", self._client._default_stream_cls)
+            if stream_cls is None:
+                raise MissingStreamClassError()
+
+            return cast(
+                R,
+                stream_cls(
+                    cast_to=cast_to,
+                    response=self.http_response,
+                    client=cast(Any, self._client),
+                ),
+            )
+
+        if cast_to is NoneType:
+            return cast(R, None)
+
+        response = self.http_response
+        if cast_to == str:
+            return cast(R, response.text)
+
+        if cast_to == bytes:
+            return cast(R, response.content)
+
+        if cast_to == int:
+            return cast(R, int(response.text))
+
+        if cast_to == float:
+            return cast(R, float(response.text))
+
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
+
+        # handle the legacy binary response case
+        if inspect.isclass(cast_to) and cast_to.__name__ == "HttpxBinaryResponseContent":
+            return cast(R, cast_to(response))  # type: ignore
+
+        if origin == APIResponse:
+            raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
+
+        if inspect.isclass(
+            origin  # pyright: ignore[reportUnknownArgumentType]
+        ) and issubclass(origin, httpx.Response):
+            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+            # and pass that class to our request functions. We cannot change the variance to be either
+            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+            # the response class ourselves but that is something that should be supported directly in httpx
+            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+            if cast_to != httpx.Response:
+                raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
+            return cast(R, response)
+
+        if (
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
+            raise TypeError("Pydantic models must subclass our base model type, e.g. `from anthropic import BaseModel`")
+
+        if (
+            cast_to is not object
+            and not origin is list
+            and not origin is dict
+            and not origin is Union
+            and not issubclass(origin, BaseModel)
+        ):
+            raise RuntimeError(
+                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
+            )
+
+        # split is required to handle cases where additional information is included
+        # in the response, e.g. application/json; charset=utf-8
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
+        if content_type != "application/json":
+            if is_basemodel(cast_to):
+                try:
+                    data = response.json()
+                except Exception as exc:
+                    log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
+                else:
+                    return self._client._process_response_data(
+                        data=data,
+                        cast_to=cast_to,  # type: ignore
+                        response=response,
+                    )
+
+            if self._client._strict_response_validation:
+                raise APIResponseValidationError(
+                    response=response,
+                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
+                    body=response.text,
+                )
+
+            # If the API responds with content that isn't JSON then we just return
+            # the (decoded) text without performing any parsing so that you can still
+            # handle the response however you need to.
+            return response.text  # type: ignore
+
+        data = response.json()
+
+        return self._client._process_response_data(
+            data=data,
+            cast_to=cast_to,  # type: ignore
+            response=response,
+        )
+
+
+class APIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T: ...
+
+    @overload
+    def parse(self) -> R: ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from anthropic import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        if not self._is_sse_stream:
+            self.read()
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
+        self._parsed_by_type[cache_key] = parsed
+        return cast(R, parsed)
+
+    def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return self.http_response.read()
+        except httpx.StreamConsumed as exc:
+            # The default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message.
+            raise StreamAlreadyConsumed() from exc
+
+    def text(self) -> str:
+        """Read and decode the response content into a string."""
+        self.read()
+        return self.http_response.text
+
+    def json(self) -> object:
+        """Read and decode the JSON response content."""
+        self.read()
+        return self.http_response.json()
+
+    def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self.http_response.close()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        for chunk in self.http_response.iter_bytes(chunk_size):
+            yield chunk
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        for chunk in self.http_response.iter_text(chunk_size):
+            yield chunk
+
+    def iter_lines(self) -> Iterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        for chunk in self.http_response.iter_lines():
+            yield chunk
+
+
+class AsyncAPIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("request-id")  # type: ignore[no-any-return]
+
+    @overload
+    async def parse(self, *, to: type[_T]) -> _T: ...
+
+    @overload
+    async def parse(self) -> R: ...
+
+    async def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from anthropic import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        if not self._is_sse_stream:
+            await self.read()
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
+        self._parsed_by_type[cache_key] = parsed
+        return cast(R, parsed)
+
+    async def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return await self.http_response.aread()
+        except httpx.StreamConsumed as exc:
+            # the default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message
+            raise StreamAlreadyConsumed() from exc
+
+    async def text(self) -> str:
+        """Read and decode the response content into a string."""
+        await self.read()
+        return self.http_response.text
+
+    async def json(self) -> object:
+        """Read and decode the JSON response content."""
+        await self.read()
+        return self.http_response.json()
+
+    async def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self.http_response.aclose()
+
+    async def iter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        async for chunk in self.http_response.aiter_bytes(chunk_size):
+            yield chunk
+
+    async def iter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        async for chunk in self.http_response.aiter_text(chunk_size):
+            yield chunk
+
+    async def iter_lines(self) -> AsyncIterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        async for chunk in self.http_response.aiter_lines():
+            yield chunk
+
+
+class BinaryAPIResponse(APIResponse[bytes]):
+    """Subclass of APIResponse providing helpers for dealing with binary data.
+
+    Note: If you want to stream the response data instead of eagerly reading it
+    all at once then you should use `.with_streaming_response` when making
+    the API request, e.g. `.with_streaming_response.get_binary_response()`
+    """
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `.with_streaming_response.get_binary_response()`
+        """
+        with open(file, mode="wb") as f:
+            for data in self.iter_bytes():
+                f.write(data)
+
+
+class AsyncBinaryAPIResponse(AsyncAPIResponse[bytes]):
+    """Subclass of APIResponse providing helpers for dealing with binary data.
+
+    Note: If you want to stream the response data instead of eagerly reading it
+    all at once then you should use `.with_streaming_response` when making
+    the API request, e.g. `.with_streaming_response.get_binary_response()`
+    """
+
+    async def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `.with_streaming_response.get_binary_response()`
+        """
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.iter_bytes():
+                await f.write(data)
+
+
+class StreamedBinaryAPIResponse(APIResponse[bytes]):
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        """Streams the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+        """
+        with open(file, mode="wb") as f:
+            for data in self.iter_bytes(chunk_size):
+                f.write(data)
+
+
+class AsyncStreamedBinaryAPIResponse(AsyncAPIResponse[bytes]):
+    async def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        """Streams the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+        """
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.iter_bytes(chunk_size):
+                await f.write(data)
+
+
+class MissingStreamClassError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `anthropic._streaming` for reference",
+        )
+
+
+class StreamAlreadyConsumed(AnthropicError):
+    """
+    Attempted to read or stream content, but the content has already
+    been streamed.
+
+    This can happen if you use a method like `.iter_lines()` and then attempt
+    to read th entire response body afterwards, e.g.
+
+    ```py
+    response = await client.post(...)
+    async for line in response.iter_lines():
+        ...  # do something with `line`
+
+    content = await response.read()
+    # ^ error
+    ```
+
+    If you want this behaviour you'll need to either manually accumulate the response
+    content or call `await response.read()` before iterating over the stream.
+    """
+
+    def __init__(self) -> None:
+        message = (
+            "Attempted to read or stream some content, but the content has "
+            "already been streamed. "
+            "This could be due to attempting to stream the response "
+            "content more than once."
+            "\n\n"
+            "You can fix this by manually accumulating the response content while streaming "
+            "or by calling `.read()` before starting to stream."
+        )
+        super().__init__(message)
+
+
+class ResponseContextManager(Generic[_APIResponseT]):
+    """Context manager for ensuring that a request is not made
+    until it is entered and that the response will always be closed
+    when the context manager exits
+    """
+
+    def __init__(self, request_func: Callable[[], _APIResponseT]) -> None:
+        self._request_func = request_func
+        self.__response: _APIResponseT | None = None
+
+    def __enter__(self) -> _APIResponseT:
+        self.__response = self._request_func()
+        return self.__response
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__response is not None:
+            self.__response.close()
+
+
+class AsyncResponseContextManager(Generic[_AsyncAPIResponseT]):
+    """Context manager for ensuring that a request is not made
+    until it is entered and that the response will always be closed
+    when the context manager exits
+    """
+
+    def __init__(self, api_request: Awaitable[_AsyncAPIResponseT]) -> None:
+        self._api_request = api_request
+        self.__response: _AsyncAPIResponseT | None = None
+
+    async def __aenter__(self) -> _AsyncAPIResponseT:
+        self.__response = await self._api_request
+        return self.__response
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__response is not None:
+            await self.__response.close()
+
+
+def to_streamed_response_wrapper(func: Callable[P, R]) -> Callable[P, ResponseContextManager[APIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support streaming and returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[APIResponse[R]]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = functools.partial(func, *args, **kwargs)
+
+        return ResponseContextManager(cast(Callable[[], APIResponse[R]], make_request))
+
+    return wrapped
+
+
+def async_to_streamed_response_wrapper(
+    func: Callable[P, Awaitable[R]],
+) -> Callable[P, AsyncResponseContextManager[AsyncAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support streaming and returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[AsyncAPIResponse[R]]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = func(*args, **kwargs)
+
+        return AsyncResponseContextManager(cast(Awaitable[AsyncAPIResponse[R]], make_request))
+
+    return wrapped
+
+
+def to_custom_streamed_response_wrapper(
+    func: Callable[P, object],
+    response_cls: type[_APIResponseT],
+) -> Callable[P, ResponseContextManager[_APIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support streaming and returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[_APIResponseT]:
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = functools.partial(func, *args, **kwargs)
+
+        return ResponseContextManager(cast(Callable[[], _APIResponseT], make_request))
+
+    return wrapped
+
+
+def async_to_custom_streamed_response_wrapper(
+    func: Callable[P, Awaitable[object]],
+    response_cls: type[_AsyncAPIResponseT],
+) -> Callable[P, AsyncResponseContextManager[_AsyncAPIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support streaming and returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[_AsyncAPIResponseT]:
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = func(*args, **kwargs)
+
+        return AsyncResponseContextManager(cast(Awaitable[_AsyncAPIResponseT], make_request))
+
+    return wrapped
+
+
+def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(APIResponse[R], func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[AsyncAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncAPIResponse[R]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(AsyncAPIResponse[R], await func(*args, **kwargs))
+
+    return wrapped
+
+
+def to_custom_raw_response_wrapper(
+    func: Callable[P, object],
+    response_cls: type[_APIResponseT],
+) -> Callable[P, _APIResponseT]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> _APIResponseT:
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(_APIResponseT, func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_custom_raw_response_wrapper(
+    func: Callable[P, Awaitable[object]],
+    response_cls: type[_AsyncAPIResponseT],
+) -> Callable[P, Awaitable[_AsyncAPIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> Awaitable[_AsyncAPIResponseT]:
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(Awaitable[_AsyncAPIResponseT], func(*args, **kwargs))
+
+    return wrapped
+
+
+def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
+    """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(APIResponse[bytes]):
+        ...
+
+    extract_response_type(MyResponse) -> bytes
+    ```
+    """
+    return extract_type_var_from_base(
+        typ,
+        generic_bases=cast("tuple[type, ...]", (BaseAPIResponse, APIResponse, AsyncAPIResponse)),
+        index=0,
+    )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_streaming.py b/.venv/lib/python3.12/site-packages/anthropic/_streaming.py
new file mode 100644
index 00000000..d43e2e6a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_streaming.py
@@ -0,0 +1,443 @@
+# Note: initially copied from https://github.com/florimondmanca/httpx-sse/blob/master/src/httpx_sse/_decoders.py
+from __future__ import annotations
+
+import abc
+import json
+import inspect
+import warnings
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, AsyncIterator, cast
+from typing_extensions import Self, Protocol, TypeGuard, override, get_origin, runtime_checkable
+
+import httpx
+
+from ._utils import is_dict, extract_type_var_from_base
+
+if TYPE_CHECKING:
+    from ._client import Anthropic, AsyncAnthropic
+
+
+_T = TypeVar("_T")
+
+
+class _SyncStreamMeta(abc.ABCMeta):
+    @override
+    def __instancecheck__(self, instance: Any) -> bool:
+        # we override the `isinstance()` check for `Stream`
+        # as a previous version of the `MessageStream` class
+        # inherited from `Stream` & without this workaround,
+        # changing it to not inherit would be a breaking change.
+
+        from .lib.streaming import MessageStream
+
+        if isinstance(instance, MessageStream):
+            warnings.warn(
+                "Using `isinstance()` to check if a `MessageStream` object is an instance of `Stream` is deprecated & will be removed in the next major version",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            return True
+
+        return False
+
+
+class Stream(Generic[_T], metaclass=_SyncStreamMeta):
+    """Provides the core interface to iterate over a synchronous stream response."""
+
+    response: httpx.Response
+
+    _decoder: SSEBytesDecoder
+
+    def __init__(
+        self,
+        *,
+        cast_to: type[_T],
+        response: httpx.Response,
+        client: Anthropic,
+    ) -> None:
+        self.response = response
+        self._cast_to = cast_to
+        self._client = client
+        self._decoder = client._make_sse_decoder()
+        self._iterator = self.__stream__()
+
+    def __next__(self) -> _T:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[_T]:
+        for item in self._iterator:
+            yield item
+
+    def _iter_events(self) -> Iterator[ServerSentEvent]:
+        yield from self._decoder.iter_bytes(self.response.iter_bytes())
+
+    def __stream__(self) -> Iterator[_T]:
+        cast_to = cast(Any, self._cast_to)
+        response = self.response
+        process_data = self._client._process_response_data
+        iterator = self._iter_events()
+
+        for sse in iterator:
+            if sse.event == "completion":
+                yield process_data(data=sse.json(), cast_to=cast_to, response=response)
+
+            if (
+                sse.event == "message_start"
+                or sse.event == "message_delta"
+                or sse.event == "message_stop"
+                or sse.event == "content_block_start"
+                or sse.event == "content_block_delta"
+                or sse.event == "content_block_stop"
+            ):
+                data = sse.json()
+                if is_dict(data) and "type" not in data:
+                    data["type"] = sse.event
+
+                yield process_data(data=data, cast_to=cast_to, response=response)
+
+            if sse.event == "ping":
+                continue
+
+            if sse.event == "error":
+                body = sse.data
+
+                try:
+                    body = sse.json()
+                    err_msg = f"{body}"
+                except Exception:
+                    err_msg = sse.data or f"Error code: {response.status_code}"
+
+                raise self._client._make_status_error(
+                    err_msg,
+                    body=body,
+                    response=self.response,
+                )
+
+        # Ensure the entire stream is consumed
+        for _sse in iterator:
+            ...
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self.response.close()
+
+
+class _AsyncStreamMeta(abc.ABCMeta):
+    @override
+    def __instancecheck__(self, instance: Any) -> bool:
+        # we override the `isinstance()` check for `AsyncStream`
+        # as a previous version of the `AsyncMessageStream` class
+        # inherited from `AsyncStream` & without this workaround,
+        # changing it to not inherit would be a breaking change.
+
+        from .lib.streaming import AsyncMessageStream
+
+        if isinstance(instance, AsyncMessageStream):
+            warnings.warn(
+                "Using `isinstance()` to check if a `AsyncMessageStream` object is an instance of `AsyncStream` is deprecated & will be removed in the next major version",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            return True
+
+        return False
+
+
+class AsyncStream(Generic[_T], metaclass=_AsyncStreamMeta):
+    """Provides the core interface to iterate over an asynchronous stream response."""
+
+    response: httpx.Response
+
+    _decoder: SSEDecoder | SSEBytesDecoder
+
+    def __init__(
+        self,
+        *,
+        cast_to: type[_T],
+        response: httpx.Response,
+        client: AsyncAnthropic,
+    ) -> None:
+        self.response = response
+        self._cast_to = cast_to
+        self._client = client
+        self._decoder = client._make_sse_decoder()
+        self._iterator = self.__stream__()
+
+    async def __anext__(self) -> _T:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[_T]:
+        async for item in self._iterator:
+            yield item
+
+    async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
+        async for sse in self._decoder.aiter_bytes(self.response.aiter_bytes()):
+            yield sse
+
+    async def __stream__(self) -> AsyncIterator[_T]:
+        cast_to = cast(Any, self._cast_to)
+        response = self.response
+        process_data = self._client._process_response_data
+        iterator = self._iter_events()
+
+        async for sse in iterator:
+            if sse.event == "completion":
+                yield process_data(data=sse.json(), cast_to=cast_to, response=response)
+
+            if (
+                sse.event == "message_start"
+                or sse.event == "message_delta"
+                or sse.event == "message_stop"
+                or sse.event == "content_block_start"
+                or sse.event == "content_block_delta"
+                or sse.event == "content_block_stop"
+            ):
+                data = sse.json()
+                if is_dict(data) and "type" not in data:
+                    data["type"] = sse.event
+
+                yield process_data(data=data, cast_to=cast_to, response=response)
+
+            if sse.event == "ping":
+                continue
+
+            if sse.event == "error":
+                body = sse.data
+
+                try:
+                    body = sse.json()
+                    err_msg = f"{body}"
+                except Exception:
+                    err_msg = sse.data or f"Error code: {response.status_code}"
+
+                raise self._client._make_status_error(
+                    err_msg,
+                    body=body,
+                    response=self.response,
+                )
+
+        # Ensure the entire stream is consumed
+        async for _sse in iterator:
+            ...
+
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self.response.aclose()
+
+
+class ServerSentEvent:
+    def __init__(
+        self,
+        *,
+        event: str | None = None,
+        data: str | None = None,
+        id: str | None = None,
+        retry: int | None = None,
+    ) -> None:
+        if data is None:
+            data = ""
+
+        self._id = id
+        self._data = data
+        self._event = event or None
+        self._retry = retry
+
+    @property
+    def event(self) -> str | None:
+        return self._event
+
+    @property
+    def id(self) -> str | None:
+        return self._id
+
+    @property
+    def retry(self) -> int | None:
+        return self._retry
+
+    @property
+    def data(self) -> str:
+        return self._data
+
+    def json(self) -> Any:
+        return json.loads(self.data)
+
+    @override
+    def __repr__(self) -> str:
+        return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})"
+
+
+class SSEDecoder:
+    _data: list[str]
+    _event: str | None
+    _retry: int | None
+    _last_event_id: str | None
+
+    def __init__(self) -> None:
+        self._event = None
+        self._data = []
+        self._last_event_id = None
+        self._retry = None
+
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        for chunk in self._iter_chunks(iterator):
+            # Split before decoding so splitlines() only uses \r and \n
+            for raw_line in chunk.splitlines():
+                line = raw_line.decode("utf-8")
+                sse = self.decode(line)
+                if sse:
+                    yield sse
+
+    def _iter_chunks(self, iterator: Iterator[bytes]) -> Iterator[bytes]:
+        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
+        data = b""
+        for chunk in iterator:
+            for line in chunk.splitlines(keepends=True):
+                data += line
+                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
+                    yield data
+                    data = b""
+        if data:
+            yield data
+
+    async def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        async for chunk in self._aiter_chunks(iterator):
+            # Split before decoding so splitlines() only uses \r and \n
+            for raw_line in chunk.splitlines():
+                line = raw_line.decode("utf-8")
+                sse = self.decode(line)
+                if sse:
+                    yield sse
+
+    async def _aiter_chunks(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[bytes]:
+        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
+        data = b""
+        async for chunk in iterator:
+            for line in chunk.splitlines(keepends=True):
+                data += line
+                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
+                    yield data
+                    data = b""
+        if data:
+            yield data
+
+    def decode(self, line: str) -> ServerSentEvent | None:
+        # See: https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation  # noqa: E501
+
+        if not line:
+            if not self._event and not self._data and not self._last_event_id and self._retry is None:
+                return None
+
+            sse = ServerSentEvent(
+                event=self._event,
+                data="\n".join(self._data),
+                id=self._last_event_id,
+                retry=self._retry,
+            )
+
+            # NOTE: as per the SSE spec, do not reset last_event_id.
+            self._event = None
+            self._data = []
+            self._retry = None
+
+            return sse
+
+        if line.startswith(":"):
+            return None
+
+        fieldname, _, value = line.partition(":")
+
+        if value.startswith(" "):
+            value = value[1:]
+
+        if fieldname == "event":
+            self._event = value
+        elif fieldname == "data":
+            self._data.append(value)
+        elif fieldname == "id":
+            if "\0" in value:
+                pass
+            else:
+                self._last_event_id = value
+        elif fieldname == "retry":
+            try:
+                self._retry = int(value)
+            except (TypeError, ValueError):
+                pass
+        else:
+            pass  # Field is ignored.
+
+        return None
+
+
+@runtime_checkable
+class SSEBytesDecoder(Protocol):
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        ...
+
+    def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
+        """Given an async iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        ...
+
+
+def is_stream_class_type(typ: type) -> TypeGuard[type[Stream[object]] | type[AsyncStream[object]]]:
+    """TypeGuard for determining whether or not the given type is a subclass of `Stream` / `AsyncStream`"""
+    origin = get_origin(typ) or typ
+    return inspect.isclass(origin) and issubclass(origin, (Stream, AsyncStream))
+
+
+def extract_stream_chunk_type(
+    stream_cls: type,
+    *,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `Stream[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyStream(Stream[bytes]):
+        ...
+
+    extract_stream_chunk_type(MyStream) -> bytes
+    ```
+    """
+    from ._base_client import Stream, AsyncStream
+
+    return extract_type_var_from_base(
+        stream_cls,
+        index=0,
+        generic_bases=cast("tuple[type, ...]", (Stream, AsyncStream)),
+        failure_message=failure_message,
+    )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_types.py b/.venv/lib/python3.12/site-packages/anthropic/_types.py
new file mode 100644
index 00000000..d80c2081
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_types.py
@@ -0,0 +1,219 @@
+from __future__ import annotations
+
+from os import PathLike
+from typing import (
+    IO,
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Type,
+    Tuple,
+    Union,
+    Mapping,
+    TypeVar,
+    Callable,
+    Optional,
+    Sequence,
+)
+from typing_extensions import Set, Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
+
+import httpx
+import pydantic
+from httpx import URL, Proxy, Timeout, Response, BaseTransport, AsyncBaseTransport
+
+if TYPE_CHECKING:
+    from ._models import BaseModel
+    from ._response import APIResponse, AsyncAPIResponse
+    from ._legacy_response import HttpxBinaryResponseContent
+
+Transport = BaseTransport
+AsyncTransport = AsyncBaseTransport
+Query = Mapping[str, object]
+Body = object
+AnyMapping = Mapping[str, object]
+ModelT = TypeVar("ModelT", bound=pydantic.BaseModel)
+_T = TypeVar("_T")
+
+
+# Approximates httpx internal ProxiesTypes and RequestFiles types
+# while adding support for `PathLike` instances
+ProxiesDict = Dict["str | URL", Union[None, str, URL, Proxy]]
+ProxiesTypes = Union[str, Proxy, ProxiesDict]
+if TYPE_CHECKING:
+    Base64FileInput = Union[IO[bytes], PathLike[str]]
+    FileContent = Union[IO[bytes], bytes, PathLike[str]]
+else:
+    Base64FileInput = Union[IO[bytes], PathLike]
+    FileContent = Union[IO[bytes], bytes, PathLike]  # PathLike is not subscriptable in Python 3.8.
+FileTypes = Union[
+    # file (or bytes)
+    FileContent,
+    # (filename, file (or bytes))
+    Tuple[Optional[str], FileContent],
+    # (filename, file (or bytes), content_type)
+    Tuple[Optional[str], FileContent, Optional[str]],
+    # (filename, file (or bytes), content_type, headers)
+    Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
+]
+RequestFiles = Union[Mapping[str, FileTypes], Sequence[Tuple[str, FileTypes]]]
+
+# duplicate of the above but without our custom file support
+HttpxFileContent = Union[IO[bytes], bytes]
+HttpxFileTypes = Union[
+    # file (or bytes)
+    HttpxFileContent,
+    # (filename, file (or bytes))
+    Tuple[Optional[str], HttpxFileContent],
+    # (filename, file (or bytes), content_type)
+    Tuple[Optional[str], HttpxFileContent, Optional[str]],
+    # (filename, file (or bytes), content_type, headers)
+    Tuple[Optional[str], HttpxFileContent, Optional[str], Mapping[str, str]],
+]
+HttpxRequestFiles = Union[Mapping[str, HttpxFileTypes], Sequence[Tuple[str, HttpxFileTypes]]]
+
+# Workaround to support (cast_to: Type[ResponseT]) -> ResponseT
+# where ResponseT includes `None`. In order to support directly
+# passing `None`, overloads would have to be defined for every
+# method that uses `ResponseT` which would lead to an unacceptable
+# amount of code duplication and make it unreadable. See _base_client.py
+# for example usage.
+#
+# This unfortunately means that you will either have
+# to import this type and pass it explicitly:
+#
+# from anthropic import NoneType
+# client.get('/foo', cast_to=NoneType)
+#
+# or build it yourself:
+#
+# client.get('/foo', cast_to=type(None))
+if TYPE_CHECKING:
+    NoneType: Type[None]
+else:
+    NoneType = type(None)
+
+
+class RequestOptions(TypedDict, total=False):
+    headers: Headers
+    max_retries: int
+    timeout: float | Timeout | None
+    params: Query
+    extra_json: AnyMapping
+    idempotency_key: str
+
+
+# Sentinel class used until PEP 0661 is accepted
+class NotGiven:
+    """
+    A sentinel singleton class used to distinguish omitted keyword arguments
+    from those passed in with the value None (which may have different behavior).
+
+    For example:
+
+    ```py
+    def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...
+
+
+    get(timeout=1)  # 1s timeout
+    get(timeout=None)  # No timeout
+    get()  # Default timeout behavior, which may not be statically known at the method definition.
+    ```
+    """
+
+    def __bool__(self) -> Literal[False]:
+        return False
+
+    @override
+    def __repr__(self) -> str:
+        return "NOT_GIVEN"
+
+
+NotGivenOr = Union[_T, NotGiven]
+NOT_GIVEN = NotGiven()
+
+
+class Omit:
+    """In certain situations you need to be able to represent a case where a default value has
+    to be explicitly removed and `None` is not an appropriate substitute, for example:
+
+    ```py
+    # as the default `Content-Type` header is `application/json` that will be sent
+    client.post("/upload/files", files={"file": b"my raw file content"})
+
+    # you can't explicitly override the header as it has to be dynamically generated
+    # to look something like: 'multipart/form-data; boundary=0d8382fcf5f8c3be01ca2e11002d2983'
+    client.post(..., headers={"Content-Type": "multipart/form-data"})
+
+    # instead you can remove the default `application/json` header by passing Omit
+    client.post(..., headers={"Content-Type": Omit()})
+    ```
+    """
+
+    def __bool__(self) -> Literal[False]:
+        return False
+
+
+@runtime_checkable
+class ModelBuilderProtocol(Protocol):
+    @classmethod
+    def build(
+        cls: type[_T],
+        *,
+        response: Response,
+        data: object,
+    ) -> _T: ...
+
+
+Headers = Mapping[str, Union[str, Omit]]
+
+
+class HeadersLikeProtocol(Protocol):
+    def get(self, __key: str) -> str | None: ...
+
+
+HeadersLike = Union[Headers, HeadersLikeProtocol]
+
+ResponseT = TypeVar(
+    "ResponseT",
+    bound=Union[
+        object,
+        str,
+        None,
+        "BaseModel",
+        List[Any],
+        Dict[str, Any],
+        Response,
+        ModelBuilderProtocol,
+        "APIResponse[Any]",
+        "AsyncAPIResponse[Any]",
+        "HttpxBinaryResponseContent",
+    ],
+)
+
+StrBytesIntFloat = Union[str, bytes, int, float]
+
+# Note: copied from Pydantic
+# https://github.com/pydantic/pydantic/blob/6f31f8f68ef011f84357330186f603ff295312fd/pydantic/main.py#L79
+IncEx: TypeAlias = Union[Set[int], Set[str], Mapping[int, Union["IncEx", bool]], Mapping[str, Union["IncEx", bool]]]
+
+PostParser = Callable[[Any], Any]
+
+
+@runtime_checkable
+class InheritsGeneric(Protocol):
+    """Represents a type that has inherited from `Generic`
+
+    The `__orig_bases__` property can be used to determine the resolved
+    type variable for a given base class.
+    """
+
+    __orig_bases__: tuple[_GenericAlias]
+
+
+class _GenericAlias(Protocol):
+    __origin__: type[object]
+
+
+class HttpxSendArgs(TypedDict, total=False):
+    auth: httpx.Auth
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/__init__.py
new file mode 100644
index 00000000..d4fda26f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/__init__.py
@@ -0,0 +1,57 @@
+from ._sync import asyncify as asyncify
+from ._proxy import LazyProxy as LazyProxy
+from ._utils import (
+    flatten as flatten,
+    is_dict as is_dict,
+    is_list as is_list,
+    is_given as is_given,
+    is_tuple as is_tuple,
+    json_safe as json_safe,
+    lru_cache as lru_cache,
+    is_mapping as is_mapping,
+    is_tuple_t as is_tuple_t,
+    parse_date as parse_date,
+    is_iterable as is_iterable,
+    is_sequence as is_sequence,
+    coerce_float as coerce_float,
+    is_mapping_t as is_mapping_t,
+    removeprefix as removeprefix,
+    removesuffix as removesuffix,
+    extract_files as extract_files,
+    is_sequence_t as is_sequence_t,
+    required_args as required_args,
+    coerce_boolean as coerce_boolean,
+    coerce_integer as coerce_integer,
+    file_from_path as file_from_path,
+    parse_datetime as parse_datetime,
+    strip_not_given as strip_not_given,
+    deepcopy_minimal as deepcopy_minimal,
+    get_async_library as get_async_library,
+    maybe_coerce_float as maybe_coerce_float,
+    get_required_header as get_required_header,
+    maybe_coerce_boolean as maybe_coerce_boolean,
+    maybe_coerce_integer as maybe_coerce_integer,
+)
+from ._typing import (
+    is_list_type as is_list_type,
+    is_union_type as is_union_type,
+    extract_type_arg as extract_type_arg,
+    is_iterable_type as is_iterable_type,
+    is_required_type as is_required_type,
+    is_annotated_type as is_annotated_type,
+    is_type_alias_type as is_type_alias_type,
+    strip_annotated_type as strip_annotated_type,
+    extract_type_var_from_base as extract_type_var_from_base,
+)
+from ._streams import consume_sync_iterator as consume_sync_iterator, consume_async_iterator as consume_async_iterator
+from ._transform import (
+    PropertyInfo as PropertyInfo,
+    transform as transform,
+    async_transform as async_transform,
+    maybe_transform as maybe_transform,
+    async_maybe_transform as async_maybe_transform,
+)
+from ._reflection import (
+    function_has_argument as function_has_argument,
+    assert_signatures_in_sync as assert_signatures_in_sync,
+)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/_logs.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/_logs.py
new file mode 100644
index 00000000..a409705b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/_logs.py
@@ -0,0 +1,25 @@
+import os
+import logging
+
+logger: logging.Logger = logging.getLogger("anthropic")
+httpx_logger: logging.Logger = logging.getLogger("httpx")
+
+
+def _basic_config() -> None:
+    # e.g. [2023-10-05 14:12:26 - anthropic._base_client:818 - DEBUG] HTTP Request: POST http://127.0.0.1:4010/foo/bar "200 OK"
+    logging.basicConfig(
+        format="[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+
+def setup_logging() -> None:
+    env = os.environ.get("ANTHROPIC_LOG")
+    if env == "debug":
+        _basic_config()
+        logger.setLevel(logging.DEBUG)
+        httpx_logger.setLevel(logging.DEBUG)
+    elif env == "info":
+        _basic_config()
+        logger.setLevel(logging.INFO)
+        httpx_logger.setLevel(logging.INFO)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/_proxy.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/_proxy.py
new file mode 100644
index 00000000..ffd883e9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/_proxy.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar, Iterable, cast
+from typing_extensions import override
+
+T = TypeVar("T")
+
+
+class LazyProxy(Generic[T], ABC):
+    """Implements data methods to pretend that an instance is another instance.
+
+    This includes forwarding attribute access and other methods.
+    """
+
+    # Note: we have to special case proxies that themselves return proxies
+    # to support using a proxy as a catch-all for any random access, e.g. `proxy.foo.bar.baz`
+
+    def __getattr__(self, attr: str) -> object:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied  # pyright: ignore
+        return getattr(proxied, attr)
+
+    @override
+    def __repr__(self) -> str:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied.__class__.__name__
+        return repr(self.__get_proxied__())
+
+    @override
+    def __str__(self) -> str:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied.__class__.__name__
+        return str(proxied)
+
+    @override
+    def __dir__(self) -> Iterable[str]:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return []
+        return proxied.__dir__()
+
+    @property  # type: ignore
+    @override
+    def __class__(self) -> type:  # pyright: ignore
+        proxied = self.__get_proxied__()
+        if issubclass(type(proxied), LazyProxy):
+            return type(proxied)
+        return proxied.__class__
+
+    def __get_proxied__(self) -> T:
+        return self.__load__()
+
+    def __as_proxied__(self) -> T:
+        """Helper method that returns the current proxy, typed as the loaded object"""
+        return cast(T, self)
+
+    @abstractmethod
+    def __load__(self) -> T: ...
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/_reflection.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/_reflection.py
new file mode 100644
index 00000000..89aa712a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/_reflection.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import inspect
+from typing import Any, Callable
+
+
+def function_has_argument(func: Callable[..., Any], arg_name: str) -> bool:
+    """Returns whether or not the given function has a specific parameter"""
+    sig = inspect.signature(func)
+    return arg_name in sig.parameters
+
+
+def assert_signatures_in_sync(
+    source_func: Callable[..., Any],
+    check_func: Callable[..., Any],
+    *,
+    exclude_params: set[str] = set(),
+) -> None:
+    """Ensure that the signature of the second function matches the first."""
+
+    check_sig = inspect.signature(check_func)
+    source_sig = inspect.signature(source_func)
+
+    errors: list[str] = []
+
+    for name, source_param in source_sig.parameters.items():
+        if name in exclude_params:
+            continue
+
+        custom_param = check_sig.parameters.get(name)
+        if not custom_param:
+            errors.append(f"the `{name}` param is missing")
+            continue
+
+        if custom_param.annotation != source_param.annotation:
+            errors.append(
+                f"types for the `{name}` param are do not match; source={repr(source_param.annotation)} checking={repr(custom_param.annotation)}"
+            )
+            continue
+
+    if errors:
+        raise AssertionError(f"{len(errors)} errors encountered when comparing signatures:\n\n" + "\n\n".join(errors))
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/_streams.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/_streams.py
new file mode 100644
index 00000000..f4a0208f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/_streams.py
@@ -0,0 +1,12 @@
+from typing import Any
+from typing_extensions import Iterator, AsyncIterator
+
+
+def consume_sync_iterator(iterator: Iterator[Any]) -> None:
+    for _ in iterator:
+        ...
+
+
+async def consume_async_iterator(iterator: AsyncIterator[Any]) -> None:
+    async for _ in iterator:
+        ...
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/_sync.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/_sync.py
new file mode 100644
index 00000000..ad7ec71b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/_sync.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import sys
+import asyncio
+import functools
+import contextvars
+from typing import Any, TypeVar, Callable, Awaitable
+from typing_extensions import ParamSpec
+
+import anyio
+import sniffio
+import anyio.to_thread
+
+T_Retval = TypeVar("T_Retval")
+T_ParamSpec = ParamSpec("T_ParamSpec")
+
+
+if sys.version_info >= (3, 9):
+    _asyncio_to_thread = asyncio.to_thread
+else:
+    # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
+    # for Python 3.8 support
+    async def _asyncio_to_thread(
+        func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+    ) -> Any:
+        """Asynchronously run function *func* in a separate thread.
+
+        Any *args and **kwargs supplied for this function are directly passed
+        to *func*. Also, the current :class:`contextvars.Context` is propagated,
+        allowing context variables from the main thread to be accessed in the
+        separate thread.
+
+        Returns a coroutine that can be awaited to get the eventual result of *func*.
+        """
+        loop = asyncio.events.get_running_loop()
+        ctx = contextvars.copy_context()
+        func_call = functools.partial(ctx.run, func, *args, **kwargs)
+        return await loop.run_in_executor(None, func_call)
+
+
+async def to_thread(
+    func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+) -> T_Retval:
+    if sniffio.current_async_library() == "asyncio":
+        return await _asyncio_to_thread(func, *args, **kwargs)
+
+    return await anyio.to_thread.run_sync(
+        functools.partial(func, *args, **kwargs),
+    )
+
+
+# inspired by `asyncer`, https://github.com/tiangolo/asyncer
+def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
+    """
+    Take a blocking function and create an async one that receives the same
+    positional and keyword arguments. For python version 3.9 and above, it uses
+    asyncio.to_thread to run the function in a separate thread. For python version
+    3.8, it uses locally defined copy of the asyncio.to_thread function which was
+    introduced in python 3.9.
+
+    Usage:
+
+    ```python
+    def blocking_func(arg1, arg2, kwarg1=None):
+        # blocking code
+        return result
+
+
+    result = asyncify(blocking_function)(arg1, arg2, kwarg1=value1)
+    ```
+
+    ## Arguments
+
+    `function`: a blocking regular callable (e.g. a function)
+
+    ## Return
+
+    An async function that takes the same positional and keyword arguments as the
+    original one, that when called runs the same original function in a thread worker
+    and returns the result.
+    """
+
+    async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
+        return await to_thread(function, *args, **kwargs)
+
+    return wrapper
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/_transform.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/_transform.py
new file mode 100644
index 00000000..18afd9d8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/_transform.py
@@ -0,0 +1,402 @@
+from __future__ import annotations
+
+import io
+import base64
+import pathlib
+from typing import Any, Mapping, TypeVar, cast
+from datetime import date, datetime
+from typing_extensions import Literal, get_args, override, get_type_hints
+
+import anyio
+import pydantic
+
+from ._utils import (
+    is_list,
+    is_mapping,
+    is_iterable,
+)
+from .._files import is_base64_file_input
+from ._typing import (
+    is_list_type,
+    is_union_type,
+    extract_type_arg,
+    is_iterable_type,
+    is_required_type,
+    is_annotated_type,
+    strip_annotated_type,
+)
+from .._compat import get_origin, model_dump, is_typeddict
+
+_T = TypeVar("_T")
+
+
+# TODO: support for drilling globals() and locals()
+# TODO: ensure works correctly with forward references in all cases
+
+
+PropertyFormat = Literal["iso8601", "base64", "custom"]
+
+
+class PropertyInfo:
+    """Metadata class to be used in Annotated types to provide information about a given type.
+
+    For example:
+
+    class MyParams(TypedDict):
+        account_holder_name: Annotated[str, PropertyInfo(alias='accountHolderName')]
+
+    This means that {'account_holder_name': 'Robert'} will be transformed to {'accountHolderName': 'Robert'} before being sent to the API.
+    """
+
+    alias: str | None
+    format: PropertyFormat | None
+    format_template: str | None
+    discriminator: str | None
+
+    def __init__(
+        self,
+        *,
+        alias: str | None = None,
+        format: PropertyFormat | None = None,
+        format_template: str | None = None,
+        discriminator: str | None = None,
+    ) -> None:
+        self.alias = alias
+        self.format = format
+        self.format_template = format_template
+        self.discriminator = discriminator
+
+    @override
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}', discriminator='{self.discriminator}')"
+
+
+def maybe_transform(
+    data: object,
+    expected_type: object,
+) -> Any | None:
+    """Wrapper over `transform()` that allows `None` to be passed.
+
+    See `transform()` for more details.
+    """
+    if data is None:
+        return None
+    return transform(data, expected_type)
+
+
+# Wrapper over _transform_recursive providing fake types
+def transform(
+    data: _T,
+    expected_type: object,
+) -> _T:
+    """Transform dictionaries based off of type information from the given type, for example:
+
+    ```py
+    class Params(TypedDict, total=False):
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+    transformed = transform({"card_id": "<my card ID>"}, Params)
+    # {'cardID': '<my card ID>'}
+    ```
+
+    Any keys / data that does not have type information given will be included as is.
+
+    It should be noted that the transformations that this function does are not represented in the type system.
+    """
+    transformed = _transform_recursive(data, annotation=cast(type, expected_type))
+    return cast(_T, transformed)
+
+
+def _get_annotated_type(type_: type) -> type | None:
+    """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
+
+    This also unwraps the type when applicable, e.g. `Required[Annotated[T, ...]]`
+    """
+    if is_required_type(type_):
+        # Unwrap `Required[Annotated[T, ...]]` to `Annotated[T, ...]`
+        type_ = get_args(type_)[0]
+
+    if is_annotated_type(type_):
+        return type_
+
+    return None
+
+
+def _maybe_transform_key(key: str, type_: type) -> str:
+    """Transform the given `data` based on the annotations provided in `type_`.
+
+    Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
+    """
+    annotated_type = _get_annotated_type(type_)
+    if annotated_type is None:
+        # no `Annotated` definition for this type, no transformation needed
+        return key
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.alias is not None:
+            return annotation.alias
+
+    return key
+
+
+def _transform_recursive(
+    data: object,
+    *,
+    annotation: type,
+    inner_type: type | None = None,
+) -> object:
+    """Transform the given data against the expected type.
+
+    Args:
+        annotation: The direct type annotation given to the particular piece of data.
+            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+            the list can be transformed using the metadata from the container type.
+
+            Defaults to the same value as the `annotation` argument.
+    """
+    if inner_type is None:
+        inner_type = annotation
+
+    stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
+    if is_typeddict(stripped_type) and is_mapping(data):
+        return _transform_typeddict(data, stripped_type)
+
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+    ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
+        inner_type = extract_type_arg(stripped_type, 0)
+        return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
+
+    if is_union_type(stripped_type):
+        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+        #
+        # TODO: there may be edge cases where the same normalized field name will transform to two different names
+        # in different subtypes.
+        for subtype in get_args(stripped_type):
+            data = _transform_recursive(data, annotation=annotation, inner_type=subtype)
+        return data
+
+    if isinstance(data, pydantic.BaseModel):
+        return model_dump(data, exclude_unset=True, mode="json")
+
+    annotated_type = _get_annotated_type(annotation)
+    if annotated_type is None:
+        return data
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+            return _format_data(data, annotation.format, annotation.format_template)
+
+    return data
+
+
+def _format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
+    if isinstance(data, (date, datetime)):
+        if format_ == "iso8601":
+            return data.isoformat()
+
+        if format_ == "custom" and format_template is not None:
+            return data.strftime(format_template)
+
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = data.read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+        return base64.b64encode(binary).decode("ascii")
+
+    return data
+
+
+def _transform_typeddict(
+    data: Mapping[str, object],
+    expected_type: type,
+) -> Mapping[str, object]:
+    result: dict[str, object] = {}
+    annotations = get_type_hints(expected_type, include_extras=True)
+    for key, value in data.items():
+        type_ = annotations.get(key)
+        if type_ is None:
+            # we do not have a type annotation for this field, leave it as is
+            result[key] = value
+        else:
+            result[_maybe_transform_key(key, type_)] = _transform_recursive(value, annotation=type_)
+    return result
+
+
+async def async_maybe_transform(
+    data: object,
+    expected_type: object,
+) -> Any | None:
+    """Wrapper over `async_transform()` that allows `None` to be passed.
+
+    See `async_transform()` for more details.
+    """
+    if data is None:
+        return None
+    return await async_transform(data, expected_type)
+
+
+async def async_transform(
+    data: _T,
+    expected_type: object,
+) -> _T:
+    """Transform dictionaries based off of type information from the given type, for example:
+
+    ```py
+    class Params(TypedDict, total=False):
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+    transformed = transform({"card_id": "<my card ID>"}, Params)
+    # {'cardID': '<my card ID>'}
+    ```
+
+    Any keys / data that does not have type information given will be included as is.
+
+    It should be noted that the transformations that this function does are not represented in the type system.
+    """
+    transformed = await _async_transform_recursive(data, annotation=cast(type, expected_type))
+    return cast(_T, transformed)
+
+
+async def _async_transform_recursive(
+    data: object,
+    *,
+    annotation: type,
+    inner_type: type | None = None,
+) -> object:
+    """Transform the given data against the expected type.
+
+    Args:
+        annotation: The direct type annotation given to the particular piece of data.
+            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+            the list can be transformed using the metadata from the container type.
+
+            Defaults to the same value as the `annotation` argument.
+    """
+    if inner_type is None:
+        inner_type = annotation
+
+    stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
+    if is_typeddict(stripped_type) and is_mapping(data):
+        return await _async_transform_typeddict(data, stripped_type)
+
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+    ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
+        inner_type = extract_type_arg(stripped_type, 0)
+        return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
+
+    if is_union_type(stripped_type):
+        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+        #
+        # TODO: there may be edge cases where the same normalized field name will transform to two different names
+        # in different subtypes.
+        for subtype in get_args(stripped_type):
+            data = await _async_transform_recursive(data, annotation=annotation, inner_type=subtype)
+        return data
+
+    if isinstance(data, pydantic.BaseModel):
+        return model_dump(data, exclude_unset=True, mode="json")
+
+    annotated_type = _get_annotated_type(annotation)
+    if annotated_type is None:
+        return data
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+            return await _async_format_data(data, annotation.format, annotation.format_template)
+
+    return data
+
+
+async def _async_format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
+    if isinstance(data, (date, datetime)):
+        if format_ == "iso8601":
+            return data.isoformat()
+
+        if format_ == "custom" and format_template is not None:
+            return data.strftime(format_template)
+
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = await anyio.Path(data).read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+        return base64.b64encode(binary).decode("ascii")
+
+    return data
+
+
+async def _async_transform_typeddict(
+    data: Mapping[str, object],
+    expected_type: type,
+) -> Mapping[str, object]:
+    result: dict[str, object] = {}
+    annotations = get_type_hints(expected_type, include_extras=True)
+    for key, value in data.items():
+        type_ = annotations.get(key)
+        if type_ is None:
+            # we do not have a type annotation for this field, leave it as is
+            result[key] = value
+        else:
+            result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
+    return result
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/_typing.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/_typing.py
new file mode 100644
index 00000000..278749b1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/_typing.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+import sys
+import typing
+import typing_extensions
+from typing import Any, TypeVar, Iterable, cast
+from collections import abc as _c_abc
+from typing_extensions import (
+    TypeIs,
+    Required,
+    Annotated,
+    get_args,
+    get_origin,
+)
+
+from .._types import InheritsGeneric
+from .._compat import is_union as _is_union
+
+
+def is_annotated_type(typ: type) -> bool:
+    return get_origin(typ) == Annotated
+
+
+def is_list_type(typ: type) -> bool:
+    return (get_origin(typ) or typ) == list
+
+
+def is_iterable_type(typ: type) -> bool:
+    """If the given type is `typing.Iterable[T]`"""
+    origin = get_origin(typ) or typ
+    return origin == Iterable or origin == _c_abc.Iterable
+
+
+def is_union_type(typ: type) -> bool:
+    return _is_union(get_origin(typ))
+
+
+def is_required_type(typ: type) -> bool:
+    return get_origin(typ) == Required
+
+
+def is_typevar(typ: type) -> bool:
+    # type ignore is required because type checkers
+    # think this expression will always return False
+    return type(typ) == TypeVar  # type: ignore
+
+
+_TYPE_ALIAS_TYPES: tuple[type[typing_extensions.TypeAliasType], ...] = (typing_extensions.TypeAliasType,)
+if sys.version_info >= (3, 12):
+    _TYPE_ALIAS_TYPES = (*_TYPE_ALIAS_TYPES, typing.TypeAliasType)
+
+
+def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
+    """Return whether the provided argument is an instance of `TypeAliasType`.
+
+    ```python
+    type Int = int
+    is_type_alias_type(Int)
+    # > True
+    Str = TypeAliasType("Str", str)
+    is_type_alias_type(Str)
+    # > True
+    ```
+    """
+    return isinstance(tp, _TYPE_ALIAS_TYPES)
+
+
+# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+def strip_annotated_type(typ: type) -> type:
+    if is_required_type(typ) or is_annotated_type(typ):
+        return strip_annotated_type(cast(type, get_args(typ)[0]))
+
+    return typ
+
+
+def extract_type_arg(typ: type, index: int) -> type:
+    args = get_args(typ)
+    try:
+        return cast(type, args[index])
+    except IndexError as err:
+        raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not") from err
+
+
+def extract_type_var_from_base(
+    typ: type,
+    *,
+    generic_bases: tuple[type, ...],
+    index: int,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `Foo[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(Foo[bytes]):
+        ...
+
+    extract_type_var(MyResponse, bases=(Foo,), index=0) -> bytes
+    ```
+
+    And where a generic subclass is given:
+    ```py
+    _T = TypeVar('_T')
+    class MyResponse(Foo[_T]):
+        ...
+
+    extract_type_var(MyResponse[bytes], bases=(Foo,), index=0) -> bytes
+    ```
+    """
+    cls = cast(object, get_origin(typ) or typ)
+    if cls in generic_bases:
+        # we're given the class directly
+        return extract_type_arg(typ, index)
+
+    # if a subclass is given
+    # ---
+    # this is needed as __orig_bases__ is not present in the typeshed stubs
+    # because it is intended to be for internal use only, however there does
+    # not seem to be a way to resolve generic TypeVars for inherited subclasses
+    # without using it.
+    if isinstance(cls, InheritsGeneric):
+        target_base_class: Any | None = None
+        for base in cls.__orig_bases__:
+            if base.__origin__ in generic_bases:
+                target_base_class = base
+                break
+
+        if target_base_class is None:
+            raise RuntimeError(
+                "Could not find the generic base class;\n"
+                "This should never happen;\n"
+                f"Does {cls} inherit from one of {generic_bases} ?"
+            )
+
+        extracted = extract_type_arg(target_base_class, index)
+        if is_typevar(extracted):
+            # If the extracted type argument is itself a type variable
+            # then that means the subclass itself is generic, so we have
+            # to resolve the type argument from the class itself, not
+            # the base class.
+            #
+            # Note: if there is more than 1 type argument, the subclass could
+            # change the ordering of the type arguments, this is not currently
+            # supported.
+            return extract_type_arg(typ, index)
+
+        return extracted
+
+    raise RuntimeError(failure_message or f"Could not resolve inner type variable at index {index} for {typ}")
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_utils/_utils.py b/.venv/lib/python3.12/site-packages/anthropic/_utils/_utils.py
new file mode 100644
index 00000000..e5811bba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_utils/_utils.py
@@ -0,0 +1,414 @@
+from __future__ import annotations
+
+import os
+import re
+import inspect
+import functools
+from typing import (
+    Any,
+    Tuple,
+    Mapping,
+    TypeVar,
+    Callable,
+    Iterable,
+    Sequence,
+    cast,
+    overload,
+)
+from pathlib import Path
+from datetime import date, datetime
+from typing_extensions import TypeGuard
+
+import sniffio
+
+from .._types import NotGiven, FileTypes, NotGivenOr, HeadersLike
+from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
+
+_T = TypeVar("_T")
+_TupleT = TypeVar("_TupleT", bound=Tuple[object, ...])
+_MappingT = TypeVar("_MappingT", bound=Mapping[str, object])
+_SequenceT = TypeVar("_SequenceT", bound=Sequence[object])
+CallableT = TypeVar("CallableT", bound=Callable[..., Any])
+
+
+def flatten(t: Iterable[Iterable[_T]]) -> list[_T]:
+    return [item for sublist in t for item in sublist]
+
+
+def extract_files(
+    # TODO: this needs to take Dict but variance issues.....
+    # create protocol type ?
+    query: Mapping[str, object],
+    *,
+    paths: Sequence[Sequence[str]],
+) -> list[tuple[str, FileTypes]]:
+    """Recursively extract files from the given dictionary based on specified paths.
+
+    A path may look like this ['foo', 'files', '<array>', 'data'].
+
+    Note: this mutates the given dictionary.
+    """
+    files: list[tuple[str, FileTypes]] = []
+    for path in paths:
+        files.extend(_extract_items(query, path, index=0, flattened_key=None))
+    return files
+
+
+def _extract_items(
+    obj: object,
+    path: Sequence[str],
+    *,
+    index: int,
+    flattened_key: str | None,
+) -> list[tuple[str, FileTypes]]:
+    try:
+        key = path[index]
+    except IndexError:
+        if isinstance(obj, NotGiven):
+            # no value was provided - we can safely ignore
+            return []
+
+        # cyclical import
+        from .._files import assert_is_file_content
+
+        # We have exhausted the path, return the entry we found.
+        assert_is_file_content(obj, key=flattened_key)
+        assert flattened_key is not None
+        return [(flattened_key, cast(FileTypes, obj))]
+
+    index += 1
+    if is_dict(obj):
+        try:
+            # We are at the last entry in the path so we must remove the field
+            if (len(path)) == index:
+                item = obj.pop(key)
+            else:
+                item = obj[key]
+        except KeyError:
+            # Key was not present in the dictionary, this is not indicative of an error
+            # as the given path may not point to a required field. We also do not want
+            # to enforce required fields as the API may differ from the spec in some cases.
+            return []
+        if flattened_key is None:
+            flattened_key = key
+        else:
+            flattened_key += f"[{key}]"
+        return _extract_items(
+            item,
+            path,
+            index=index,
+            flattened_key=flattened_key,
+        )
+    elif is_list(obj):
+        if key != "<array>":
+            return []
+
+        return flatten(
+            [
+                _extract_items(
+                    item,
+                    path,
+                    index=index,
+                    flattened_key=flattened_key + "[]" if flattened_key is not None else "[]",
+                )
+                for item in obj
+            ]
+        )
+
+    # Something unexpected was passed, just ignore it.
+    return []
+
+
+def is_given(obj: NotGivenOr[_T]) -> TypeGuard[_T]:
+    return not isinstance(obj, NotGiven)
+
+
+# Type safe methods for narrowing types with TypeVars.
+# The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
+# however this cause Pyright to rightfully report errors. As we know we don't
+# care about the contained types we can safely use `object` in it's place.
+#
+# There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
+# `is_*` is for when you're dealing with an unknown input
+# `is_*_t` is for when you're narrowing a known union type to a specific subset
+
+
+def is_tuple(obj: object) -> TypeGuard[tuple[object, ...]]:
+    return isinstance(obj, tuple)
+
+
+def is_tuple_t(obj: _TupleT | object) -> TypeGuard[_TupleT]:
+    return isinstance(obj, tuple)
+
+
+def is_sequence(obj: object) -> TypeGuard[Sequence[object]]:
+    return isinstance(obj, Sequence)
+
+
+def is_sequence_t(obj: _SequenceT | object) -> TypeGuard[_SequenceT]:
+    return isinstance(obj, Sequence)
+
+
+def is_mapping(obj: object) -> TypeGuard[Mapping[str, object]]:
+    return isinstance(obj, Mapping)
+
+
+def is_mapping_t(obj: _MappingT | object) -> TypeGuard[_MappingT]:
+    return isinstance(obj, Mapping)
+
+
+def is_dict(obj: object) -> TypeGuard[dict[object, object]]:
+    return isinstance(obj, dict)
+
+
+def is_list(obj: object) -> TypeGuard[list[object]]:
+    return isinstance(obj, list)
+
+
+def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
+    return isinstance(obj, Iterable)
+
+
+def deepcopy_minimal(item: _T) -> _T:
+    """Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
+
+    - mappings, e.g. `dict`
+    - list
+
+    This is done for performance reasons.
+    """
+    if is_mapping(item):
+        return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
+    if is_list(item):
+        return cast(_T, [deepcopy_minimal(entry) for entry in item])
+    return item
+
+
+# copied from https://github.com/Rapptz/RoboDanny
+def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
+    size = len(seq)
+    if size == 0:
+        return ""
+
+    if size == 1:
+        return seq[0]
+
+    if size == 2:
+        return f"{seq[0]} {final} {seq[1]}"
+
+    return delim.join(seq[:-1]) + f" {final} {seq[-1]}"
+
+
+def quote(string: str) -> str:
+    """Add single quotation marks around the given string. Does *not* do any escaping."""
+    return f"'{string}'"
+
+
+def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
+    """Decorator to enforce a given set of arguments or variants of arguments are passed to the decorated function.
+
+    Useful for enforcing runtime validation of overloaded functions.
+
+    Example usage:
+    ```py
+    @overload
+    def foo(*, a: str) -> str: ...
+
+
+    @overload
+    def foo(*, b: bool) -> str: ...
+
+
+    # This enforces the same constraints that a static type checker would
+    # i.e. that either a or b must be passed to the function
+    @required_args(["a"], ["b"])
+    def foo(*, a: str | None = None, b: bool | None = None) -> str: ...
+    ```
+    """
+
+    def inner(func: CallableT) -> CallableT:
+        params = inspect.signature(func).parameters
+        positional = [
+            name
+            for name, param in params.items()
+            if param.kind
+            in {
+                param.POSITIONAL_ONLY,
+                param.POSITIONAL_OR_KEYWORD,
+            }
+        ]
+
+        @functools.wraps(func)
+        def wrapper(*args: object, **kwargs: object) -> object:
+            given_params: set[str] = set()
+            for i, _ in enumerate(args):
+                try:
+                    given_params.add(positional[i])
+                except IndexError:
+                    raise TypeError(
+                        f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given"
+                    ) from None
+
+            for key in kwargs.keys():
+                given_params.add(key)
+
+            for variant in variants:
+                matches = all((param in given_params for param in variant))
+                if matches:
+                    break
+            else:  # no break
+                if len(variants) > 1:
+                    variations = human_join(
+                        ["(" + human_join([quote(arg) for arg in variant], final="and") + ")" for variant in variants]
+                    )
+                    msg = f"Missing required arguments; Expected either {variations} arguments to be given"
+                else:
+                    assert len(variants) > 0
+
+                    # TODO: this error message is not deterministic
+                    missing = list(set(variants[0]) - given_params)
+                    if len(missing) > 1:
+                        msg = f"Missing required arguments: {human_join([quote(arg) for arg in missing])}"
+                    else:
+                        msg = f"Missing required argument: {quote(missing[0])}"
+                raise TypeError(msg)
+            return func(*args, **kwargs)
+
+        return wrapper  # type: ignore
+
+    return inner
+
+
+_K = TypeVar("_K")
+_V = TypeVar("_V")
+
+
+@overload
+def strip_not_given(obj: None) -> None: ...
+
+
+@overload
+def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]: ...
+
+
+@overload
+def strip_not_given(obj: object) -> object: ...
+
+
+def strip_not_given(obj: object | None) -> object:
+    """Remove all top-level keys where their values are instances of `NotGiven`"""
+    if obj is None:
+        return None
+
+    if not is_mapping(obj):
+        return obj
+
+    return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
+
+
+def coerce_integer(val: str) -> int:
+    return int(val, base=10)
+
+
+def coerce_float(val: str) -> float:
+    return float(val)
+
+
+def coerce_boolean(val: str) -> bool:
+    return val == "true" or val == "1" or val == "on"
+
+
+def maybe_coerce_integer(val: str | None) -> int | None:
+    if val is None:
+        return None
+    return coerce_integer(val)
+
+
+def maybe_coerce_float(val: str | None) -> float | None:
+    if val is None:
+        return None
+    return coerce_float(val)
+
+
+def maybe_coerce_boolean(val: str | None) -> bool | None:
+    if val is None:
+        return None
+    return coerce_boolean(val)
+
+
+def removeprefix(string: str, prefix: str) -> str:
+    """Remove a prefix from a string.
+
+    Backport of `str.removeprefix` for Python < 3.9
+    """
+    if string.startswith(prefix):
+        return string[len(prefix) :]
+    return string
+
+
+def removesuffix(string: str, suffix: str) -> str:
+    """Remove a suffix from a string.
+
+    Backport of `str.removesuffix` for Python < 3.9
+    """
+    if string.endswith(suffix):
+        return string[: -len(suffix)]
+    return string
+
+
+def file_from_path(path: str) -> FileTypes:
+    contents = Path(path).read_bytes()
+    file_name = os.path.basename(path)
+    return (file_name, contents)
+
+
+def get_required_header(headers: HeadersLike, header: str) -> str:
+    lower_header = header.lower()
+    if is_mapping_t(headers):
+        # mypy doesn't understand the type narrowing here
+        for k, v in headers.items():  # type: ignore
+            if k.lower() == lower_header and isinstance(v, str):
+                return v
+
+    # to deal with the case where the header looks like Stainless-Event-Id
+    intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
+
+    for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
+        value = headers.get(normalized_header)
+        if value:
+            return value
+
+    raise ValueError(f"Could not find {header} header")
+
+
+def get_async_library() -> str:
+    try:
+        return sniffio.current_async_library()
+    except Exception:
+        return "false"
+
+
+def lru_cache(*, maxsize: int | None = 128) -> Callable[[CallableT], CallableT]:
+    """A version of functools.lru_cache that retains the type signature
+    for the wrapped function arguments.
+    """
+    wrapper = functools.lru_cache(  # noqa: TID251
+        maxsize=maxsize,
+    )
+    return cast(Any, wrapper)  # type: ignore[no-any-return]
+
+
+def json_safe(data: object) -> object:
+    """Translates a mapping / sequence recursively in the same fashion
+    as `pydantic` v2's `model_dump(mode="json")`.
+    """
+    if is_mapping(data):
+        return {json_safe(key): json_safe(value) for key, value in data.items()}
+
+    if is_iterable(data) and not isinstance(data, (str, bytes, bytearray)):
+        return [json_safe(item) for item in data]
+
+    if isinstance(data, (datetime, date)):
+        return data.isoformat()
+
+    return data
diff --git a/.venv/lib/python3.12/site-packages/anthropic/_version.py b/.venv/lib/python3.12/site-packages/anthropic/_version.py
new file mode 100644
index 00000000..181038b4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/_version.py
@@ -0,0 +1,4 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+__title__ = "anthropic"
+__version__ = "0.49.0"  # x-release-please-version
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/.keep b/.venv/lib/python3.12/site-packages/anthropic/lib/.keep
new file mode 100644
index 00000000..5e2c99fd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/.keep
@@ -0,0 +1,4 @@
+File generated from our OpenAPI spec by Stainless.
+
+This directory can be used to store custom files to expand the SDK.
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
+\ No newline at end of file
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/lib/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/__init__.py
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/__init__.py
new file mode 100644
index 00000000..4e3037ee
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/__init__.py
@@ -0,0 +1 @@
+from ._google_auth import google_auth as google_auth
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/_common.py b/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/_common.py
new file mode 100644
index 00000000..5d2b7f6a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/_common.py
@@ -0,0 +1,13 @@
+from ..._exceptions import AnthropicError
+
+INSTRUCTIONS = """
+
+Anthropic error: missing required dependency `{library}`.
+
+    $ pip install anthropic[{extra}]
+"""
+
+
+class MissingDependencyError(AnthropicError):
+    def __init__(self, *, library: str, extra: str) -> None:
+        super().__init__(INSTRUCTIONS.format(library=library, extra=extra))
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/_google_auth.py b/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/_google_auth.py
new file mode 100644
index 00000000..16cc7909
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/_extras/_google_auth.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+from typing_extensions import ClassVar, override
+
+from ._common import MissingDependencyError
+from ..._utils import LazyProxy
+
+if TYPE_CHECKING:
+    import google.auth  # type: ignore
+
+    google_auth = google.auth
+
+
+class GoogleAuthProxy(LazyProxy[Any]):
+    should_cache: ClassVar[bool] = True
+
+    @override
+    def __load__(self) -> Any:
+        try:
+            import google.auth  # type: ignore
+        except ImportError as err:
+            raise MissingDependencyError(extra="vertex", library="google-auth") from err
+
+        return google.auth
+
+
+if not TYPE_CHECKING:
+    google_auth = GoogleAuthProxy()
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/__init__.py
new file mode 100644
index 00000000..69440c76
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/__init__.py
@@ -0,0 +1 @@
+from ._client import AnthropicBedrock as AnthropicBedrock, AsyncAnthropicBedrock as AsyncAnthropicBedrock
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_auth.py b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_auth.py
new file mode 100644
index 00000000..0a8b2109
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_auth.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import httpx
+
+from ..._utils import lru_cache
+
+if TYPE_CHECKING:
+    import boto3
+
+
+@lru_cache(maxsize=512)
+def _get_session(
+    *,
+    aws_access_key: str | None,
+    aws_secret_key: str | None,
+    aws_session_token: str | None,
+    region: str | None,
+    profile: str | None,
+) -> boto3.Session:
+    import boto3
+
+    return boto3.Session(
+        profile_name=profile,
+        region_name=region,
+        aws_access_key_id=aws_access_key,
+        aws_secret_access_key=aws_secret_key,
+        aws_session_token=aws_session_token,
+    )
+
+
+def get_auth_headers(
+    *,
+    method: str,
+    url: str,
+    headers: httpx.Headers,
+    aws_access_key: str | None,
+    aws_secret_key: str | None,
+    aws_session_token: str | None,
+    region: str | None,
+    profile: str | None,
+    data: str | None,
+) -> dict[str, str]:
+    from botocore.auth import SigV4Auth
+    from botocore.awsrequest import AWSRequest
+
+    session = _get_session(
+        profile=profile,
+        region=region,
+        aws_access_key=aws_access_key,
+        aws_secret_key=aws_secret_key,
+        aws_session_token=aws_session_token,
+    )
+
+    # The connection header may be stripped by a proxy somewhere, so the receiver
+    # of this message may not see this header, so we remove it from the set of headers
+    # that are signed.
+    headers = headers.copy()
+    del headers["connection"]
+
+    request = AWSRequest(method=method.upper(), url=url, headers=headers, data=data)
+    credentials = session.get_credentials()
+    if not credentials:
+        raise RuntimeError("could not resolve credentials from session")
+
+    signer = SigV4Auth(credentials, "bedrock", session.region_name)
+    signer.add_auth(request)
+
+    prepped = request.prepare()
+
+    return {key: value for key, value in dict(prepped.headers).items() if value is not None}
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_beta.py b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_beta.py
new file mode 100644
index 00000000..f2a91b42
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_beta.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ._beta_messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return BetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._beta.messages)
+
+
+class AsyncBetaWithRawResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._beta.messages)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._beta.messages)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._beta.messages)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_beta_messages.py b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_beta_messages.py
new file mode 100644
index 00000000..332f6fba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_beta_messages.py
@@ -0,0 +1,93 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ... import _legacy_response
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...resources.beta import Messages as FirstPartyMessagesAPI, AsyncMessages as FirstPartyAsyncMessagesAPI
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    create = FirstPartyMessagesAPI.create
+
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+
+class AsyncMessages(AsyncAPIResource):
+    create = FirstPartyAsyncMessagesAPI.create
+
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_client.py b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_client.py
new file mode 100644
index 00000000..ca645489
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_client.py
@@ -0,0 +1,390 @@
+from __future__ import annotations
+
+import os
+import urllib.parse
+from typing import Any, Union, Mapping, TypeVar
+from typing_extensions import Self, override
+
+import httpx
+
+from ... import _exceptions
+from ._beta import Beta, AsyncBeta
+from ..._types import NOT_GIVEN, Timeout, NotGiven
+from ..._utils import is_dict, is_given
+from ..._compat import model_copy
+from ..._version import __version__
+from ..._streaming import Stream, AsyncStream
+from ..._exceptions import AnthropicError, APIStatusError
+from ..._base_client import (
+    DEFAULT_MAX_RETRIES,
+    BaseClient,
+    SyncAPIClient,
+    AsyncAPIClient,
+    FinalRequestOptions,
+)
+from ._stream_decoder import AWSEventStreamDecoder
+from ...resources.messages import Messages, AsyncMessages
+from ...resources.completions import Completions, AsyncCompletions
+
+DEFAULT_VERSION = "bedrock-2023-05-31"
+
+_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
+_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
+
+
+def _prepare_options(input_options: FinalRequestOptions) -> FinalRequestOptions:
+    options = model_copy(input_options, deep=True)
+
+    if is_dict(options.json_data):
+        options.json_data.setdefault("anthropic_version", DEFAULT_VERSION)
+
+        if is_given(options.headers):
+            betas = options.headers.get("anthropic-beta")
+            if betas:
+                options.json_data.setdefault("anthropic_beta", betas.split(","))
+
+    if options.url in {"/v1/complete", "/v1/messages", "/v1/messages?beta=true"} and options.method == "post":
+        if not is_dict(options.json_data):
+            raise RuntimeError("Expected dictionary json_data for post /completions endpoint")
+
+        model = options.json_data.pop("model", None)
+        model = urllib.parse.quote(str(model), safe=":")
+        stream = options.json_data.pop("stream", False)
+        if stream:
+            options.url = f"/model/{model}/invoke-with-response-stream"
+        else:
+            options.url = f"/model/{model}/invoke"
+
+    if options.url.startswith("/v1/messages/batches"):
+        raise AnthropicError("The Batch API is not supported in Bedrock yet")
+
+    if options.url == "/v1/messages/count_tokens":
+        raise AnthropicError("Token counting is not supported in Bedrock yet")
+
+    return options
+
+
+class BaseBedrockClient(BaseClient[_HttpxClientT, _DefaultStreamT]):
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=body)
+
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
+
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
+
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=body)
+
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=body)
+
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
+
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=body)
+
+        if response.status_code == 503:
+            return _exceptions.ServiceUnavailableError(err_msg, response=response, body=body)
+
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
+        return APIStatusError(err_msg, response=response, body=body)
+
+
+class AnthropicBedrock(BaseBedrockClient[httpx.Client, Stream[Any]], SyncAPIClient):
+    messages: Messages
+    completions: Completions
+    beta: Beta
+
+    def __init__(
+        self,
+        aws_secret_key: str | None = None,
+        aws_access_key: str | None = None,
+        aws_region: str | None = None,
+        aws_profile: str | None = None,
+        aws_session_token: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        http_client: httpx.Client | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        self.aws_secret_key = aws_secret_key
+
+        self.aws_access_key = aws_access_key
+
+        if aws_region is None:
+            aws_region = os.environ.get("AWS_REGION") or "us-east-1"
+        self.aws_region = aws_region
+        self.aws_profile = aws_profile
+
+        self.aws_session_token = aws_session_token
+
+        if base_url is None:
+            base_url = os.environ.get("ANTHROPIC_BEDROCK_BASE_URL")
+        if base_url is None:
+            base_url = f"https://bedrock-runtime.{self.aws_region}.amazonaws.com"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            http_client=http_client,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        self.beta = Beta(self)
+        self.messages = Messages(self)
+        self.completions = Completions(self)
+
+    @override
+    def _make_sse_decoder(self) -> AWSEventStreamDecoder:
+        return AWSEventStreamDecoder()
+
+    @override
+    def _prepare_options(self, options: FinalRequestOptions) -> FinalRequestOptions:
+        return _prepare_options(options)
+
+    @override
+    def _prepare_request(self, request: httpx.Request) -> None:
+        from ._auth import get_auth_headers
+
+        data = request.read().decode()
+
+        headers = get_auth_headers(
+            method=request.method,
+            url=str(request.url),
+            headers=request.headers,
+            aws_access_key=self.aws_access_key,
+            aws_secret_key=self.aws_secret_key,
+            aws_session_token=self.aws_session_token,
+            region=self.aws_region or "us-east-1",
+            profile=self.aws_profile,
+            data=data,
+        )
+        request.headers.update(headers)
+
+    def copy(
+        self,
+        *,
+        aws_secret_key: str | None = None,
+        aws_access_key: str | None = None,
+        aws_region: str | None = None,
+        aws_session_token: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.Client | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        return self.__class__(
+            aws_secret_key=aws_secret_key or self.aws_secret_key,
+            aws_access_key=aws_access_key or self.aws_access_key,
+            aws_region=aws_region or self.aws_region,
+            aws_session_token=aws_session_token or self.aws_session_token,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+
+class AsyncAnthropicBedrock(BaseBedrockClient[httpx.AsyncClient, AsyncStream[Any]], AsyncAPIClient):
+    messages: AsyncMessages
+    completions: AsyncCompletions
+    beta: AsyncBeta
+
+    def __init__(
+        self,
+        aws_secret_key: str | None = None,
+        aws_access_key: str | None = None,
+        aws_region: str | None = None,
+        aws_profile: str | None = None,
+        aws_session_token: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        http_client: httpx.AsyncClient | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        self.aws_secret_key = aws_secret_key
+
+        self.aws_access_key = aws_access_key
+
+        if aws_region is None:
+            aws_region = os.environ.get("AWS_REGION") or "us-east-1"
+        self.aws_region = aws_region
+        self.aws_profile = aws_profile
+
+        self.aws_session_token = aws_session_token
+
+        if base_url is None:
+            base_url = os.environ.get("ANTHROPIC_BEDROCK_BASE_URL")
+        if base_url is None:
+            base_url = f"https://bedrock-runtime.{self.aws_region}.amazonaws.com"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            http_client=http_client,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        self.messages = AsyncMessages(self)
+        self.completions = AsyncCompletions(self)
+        self.beta = AsyncBeta(self)
+
+    @override
+    def _make_sse_decoder(self) -> AWSEventStreamDecoder:
+        return AWSEventStreamDecoder()
+
+    @override
+    async def _prepare_options(self, options: FinalRequestOptions) -> FinalRequestOptions:
+        return _prepare_options(options)
+
+    @override
+    async def _prepare_request(self, request: httpx.Request) -> None:
+        from ._auth import get_auth_headers
+
+        data = request.read().decode()
+
+        headers = get_auth_headers(
+            method=request.method,
+            url=str(request.url),
+            headers=request.headers,
+            aws_access_key=self.aws_access_key,
+            aws_secret_key=self.aws_secret_key,
+            aws_session_token=self.aws_session_token,
+            region=self.aws_region or "us-east-1",
+            profile=self.aws_profile,
+            data=data,
+        )
+        request.headers.update(headers)
+
+    def copy(
+        self,
+        *,
+        aws_secret_key: str | None = None,
+        aws_access_key: str | None = None,
+        aws_region: str | None = None,
+        aws_session_token: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.AsyncClient | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        return self.__class__(
+            aws_secret_key=aws_secret_key or self.aws_secret_key,
+            aws_access_key=aws_access_key or self.aws_access_key,
+            aws_region=aws_region or self.aws_region,
+            aws_session_token=aws_session_token or self.aws_session_token,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_stream.py b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_stream.py
new file mode 100644
index 00000000..6512c468
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_stream.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import TypeVar
+
+import httpx
+
+from ..._client import Anthropic, AsyncAnthropic
+from ..._streaming import Stream, AsyncStream
+from ._stream_decoder import AWSEventStreamDecoder
+
+_T = TypeVar("_T")
+
+
+class BedrockStream(Stream[_T]):
+    def __init__(
+        self,
+        *,
+        cast_to: type[_T],
+        response: httpx.Response,
+        client: Anthropic,
+    ) -> None:
+        super().__init__(cast_to=cast_to, response=response, client=client)
+
+        self._decoder = AWSEventStreamDecoder()
+
+
+class AsyncBedrockStream(AsyncStream[_T]):
+    def __init__(
+        self,
+        *,
+        cast_to: type[_T],
+        response: httpx.Response,
+        client: AsyncAnthropic,
+    ) -> None:
+        super().__init__(cast_to=cast_to, response=response, client=client)
+
+        self._decoder = AWSEventStreamDecoder()
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_stream_decoder.py b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_stream_decoder.py
new file mode 100644
index 00000000..02e81a3c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/bedrock/_stream_decoder.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Iterator, AsyncIterator
+
+from ..._utils import lru_cache
+from ..._streaming import ServerSentEvent
+
+if TYPE_CHECKING:
+    from botocore.model import Shape
+    from botocore.eventstream import EventStreamMessage
+
+
+@lru_cache(maxsize=None)
+def get_response_stream_shape() -> Shape:
+    from botocore.model import ServiceModel
+    from botocore.loaders import Loader
+
+    loader = Loader()
+    bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2")
+    bedrock_service_model = ServiceModel(bedrock_service_dict)
+    return bedrock_service_model.shape_for("ResponseStream")
+
+
+class AWSEventStreamDecoder:
+    def __init__(self) -> None:
+        from botocore.parsers import EventStreamJSONParser
+
+        self.parser = EventStreamJSONParser()
+
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields lines, iterate over it & yield every event encountered"""
+        from botocore.eventstream import EventStreamBuffer
+
+        event_stream_buffer = EventStreamBuffer()
+        for chunk in iterator:
+            event_stream_buffer.add_data(chunk)
+            for event in event_stream_buffer:
+                message = self._parse_message_from_event(event)
+                if message:
+                    yield ServerSentEvent(data=message, event="completion")
+
+    async def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
+        """Given an async iterator that yields lines, iterate over it & yield every event encountered"""
+        from botocore.eventstream import EventStreamBuffer
+
+        event_stream_buffer = EventStreamBuffer()
+        async for chunk in iterator:
+            event_stream_buffer.add_data(chunk)
+            for event in event_stream_buffer:
+                message = self._parse_message_from_event(event)
+                if message:
+                    yield ServerSentEvent(data=message, event="completion")
+
+    def _parse_message_from_event(self, event: EventStreamMessage) -> str | None:
+        response_dict = event.to_response_dict()
+        parsed_response = self.parser.parse(response_dict, get_response_stream_shape())
+        if response_dict["status_code"] != 200:
+            raise ValueError(f"Bad response code, expected 200: {response_dict}")
+
+        chunk = parsed_response.get("chunk")
+        if not chunk:
+            return None
+
+        return chunk.get("bytes").decode()  # type: ignore[no-any-return]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/__init__.py
new file mode 100644
index 00000000..103fff58
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/__init__.py
@@ -0,0 +1,26 @@
+from ._types import (
+    TextEvent as TextEvent,
+    InputJsonEvent as InputJsonEvent,
+    MessageStopEvent as MessageStopEvent,
+    MessageStreamEvent as MessageStreamEvent,
+    ContentBlockStopEvent as ContentBlockStopEvent,
+)
+from ._messages import (
+    MessageStream as MessageStream,
+    AsyncMessageStream as AsyncMessageStream,
+    MessageStreamManager as MessageStreamManager,
+    AsyncMessageStreamManager as AsyncMessageStreamManager,
+)
+from ._beta_types import (
+    BetaTextEvent as BetaTextEvent,
+    BetaInputJsonEvent as BetaInputJsonEvent,
+    BetaMessageStopEvent as BetaMessageStopEvent,
+    BetaMessageStreamEvent as BetaMessageStreamEvent,
+    BetaContentBlockStopEvent as BetaContentBlockStopEvent,
+)
+from ._beta_messages import (
+    BetaMessageStream as BetaMessageStream,
+    BetaAsyncMessageStream as BetaAsyncMessageStream,
+    BetaMessageStreamManager as BetaMessageStreamManager,
+    BetaAsyncMessageStreamManager as BetaAsyncMessageStreamManager,
+)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_beta_messages.py b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_beta_messages.py
new file mode 100644
index 00000000..d979f83c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_beta_messages.py
@@ -0,0 +1,462 @@
+from __future__ import annotations
+
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Type, Callable, cast
+from typing_extensions import Self, Iterator, Awaitable, AsyncIterator, assert_never
+
+import httpx
+from pydantic import BaseModel
+
+from ..._utils import consume_sync_iterator, consume_async_iterator
+from ..._models import build, construct_type, construct_type_unchecked
+from ._beta_types import (
+    BetaTextEvent,
+    BetaCitationEvent,
+    BetaThinkingEvent,
+    BetaInputJsonEvent,
+    BetaSignatureEvent,
+    BetaMessageStopEvent,
+    BetaMessageStreamEvent,
+    BetaContentBlockStopEvent,
+)
+from ..._streaming import Stream, AsyncStream
+from ...types.beta import BetaMessage, BetaContentBlock, BetaRawMessageStreamEvent
+
+
+class BetaMessageStream:
+    text_stream: Iterator[str]
+    """Iterator over just the text deltas in the stream.
+
+    ```py
+    for text in stream.text_stream:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self, raw_stream: Stream[BetaRawMessageStreamEvent]) -> None:
+        self._raw_stream = raw_stream
+        self.text_stream = self.__stream_text__()
+        self._iterator = self.__stream__()
+        self.__final_message_snapshot: BetaMessage | None = None
+
+    @property
+    def response(self) -> httpx.Response:
+        return self._raw_stream.response
+
+    @property
+    def request_id(self) -> str | None:
+        return self.response.headers.get("request-id")  # type: ignore[no-any-return]
+
+    def __next__(self) -> BetaMessageStreamEvent:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[BetaMessageStreamEvent]:
+        for item in self._iterator:
+            yield item
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self._raw_stream.close()
+
+    def get_final_message(self) -> BetaMessage:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `Message` object.
+        """
+        self.until_done()
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    def get_final_text(self) -> str:
+        """Returns all `text` content blocks concatenated together.
+
+        > [!NOTE]
+        > Currently the API will only respond with a single content block.
+
+        Will raise an error if no `text` content blocks were returned.
+        """
+        message = self.get_final_message()
+        text_blocks: list[str] = []
+        for block in message.content:
+            if block.type == "text":
+                text_blocks.append(block.text)
+
+        if not text_blocks:
+            raise RuntimeError("Expected to have received at least 1 text block")
+
+        return "".join(text_blocks)
+
+    def until_done(self) -> None:
+        """Blocks until the stream has been consumed"""
+        consume_sync_iterator(self)
+
+    # properties
+    @property
+    def current_message_snapshot(self) -> BetaMessage:
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    def __stream__(self) -> Iterator[BetaMessageStreamEvent]:
+        for sse_event in self._raw_stream:
+            self.__final_message_snapshot = accumulate_event(
+                event=sse_event,
+                current_snapshot=self.__final_message_snapshot,
+            )
+
+            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
+            for event in events_to_fire:
+                yield event
+
+    def __stream_text__(self) -> Iterator[str]:
+        for chunk in self:
+            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
+                yield chunk.delta.text
+
+
+class BetaMessageStreamManager:
+    """Wrapper over MessageStream that is returned by `.stream()`.
+
+    ```py
+    with client.beta.messages.stream(...) as stream:
+        for chunk in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[BetaRawMessageStreamEvent]],
+    ) -> None:
+        self.__stream: BetaMessageStream | None = None
+        self.__api_request = api_request
+
+    def __enter__(self) -> BetaMessageStream:
+        raw_stream = self.__api_request()
+        self.__stream = BetaMessageStream(raw_stream)
+        return self.__stream
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class BetaAsyncMessageStream:
+    text_stream: AsyncIterator[str]
+    """Async iterator over just the text deltas in the stream.
+
+    ```py
+    async for text in stream.text_stream:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self, raw_stream: AsyncStream[BetaRawMessageStreamEvent]) -> None:
+        self._raw_stream = raw_stream
+        self.text_stream = self.__stream_text__()
+        self._iterator = self.__stream__()
+        self.__final_message_snapshot: BetaMessage | None = None
+
+    @property
+    def response(self) -> httpx.Response:
+        return self._raw_stream.response
+
+    @property
+    def request_id(self) -> str | None:
+        return self.response.headers.get("request-id")  # type: ignore[no-any-return]
+
+    async def __anext__(self) -> BetaMessageStreamEvent:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[BetaMessageStreamEvent]:
+        async for item in self._iterator:
+            yield item
+
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self._raw_stream.close()
+
+    async def get_final_message(self) -> BetaMessage:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `Message` object.
+        """
+        await self.until_done()
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    async def get_final_text(self) -> str:
+        """Returns all `text` content blocks concatenated together.
+
+        > [!NOTE]
+        > Currently the API will only respond with a single content block.
+
+        Will raise an error if no `text` content blocks were returned.
+        """
+        message = await self.get_final_message()
+        text_blocks: list[str] = []
+        for block in message.content:
+            if block.type == "text":
+                text_blocks.append(block.text)
+
+        if not text_blocks:
+            raise RuntimeError("Expected to have received at least 1 text block")
+
+        return "".join(text_blocks)
+
+    async def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        await consume_async_iterator(self)
+
+    # properties
+    @property
+    def current_message_snapshot(self) -> BetaMessage:
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    async def __stream__(self) -> AsyncIterator[BetaMessageStreamEvent]:
+        async for sse_event in self._raw_stream:
+            self.__final_message_snapshot = accumulate_event(
+                event=sse_event,
+                current_snapshot=self.__final_message_snapshot,
+            )
+
+            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
+            for event in events_to_fire:
+                yield event
+
+    async def __stream_text__(self) -> AsyncIterator[str]:
+        async for chunk in self:
+            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
+                yield chunk.delta.text
+
+
+class BetaAsyncMessageStreamManager:
+    """Wrapper over BetaAsyncMessageStream that is returned by `.stream()`
+    so that an async context manager can be used without `await`ing the
+    original client call.
+
+    ```py
+    async with client.beta.messages.stream(...) as stream:
+        async for chunk in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[BetaRawMessageStreamEvent]],
+    ) -> None:
+        self.__stream: BetaAsyncMessageStream | None = None
+        self.__api_request = api_request
+
+    async def __aenter__(self) -> BetaAsyncMessageStream:
+        raw_stream = await self.__api_request
+        self.__stream = BetaAsyncMessageStream(raw_stream)
+        return self.__stream
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+def build_events(
+    *,
+    event: BetaRawMessageStreamEvent,
+    message_snapshot: BetaMessage,
+) -> list[BetaMessageStreamEvent]:
+    events_to_fire: list[BetaMessageStreamEvent] = []
+
+    if event.type == "message_start":
+        events_to_fire.append(event)
+    elif event.type == "message_delta":
+        events_to_fire.append(event)
+    elif event.type == "message_stop":
+        events_to_fire.append(build(BetaMessageStopEvent, type="message_stop", message=message_snapshot))
+    elif event.type == "content_block_start":
+        events_to_fire.append(event)
+    elif event.type == "content_block_delta":
+        events_to_fire.append(event)
+
+        content_block = message_snapshot.content[event.index]
+        if event.delta.type == "text_delta":
+            if content_block.type == "text":
+                events_to_fire.append(
+                    build(
+                        BetaTextEvent,
+                        type="text",
+                        text=event.delta.text,
+                        snapshot=content_block.text,
+                    )
+                )
+        elif event.delta.type == "input_json_delta":
+            if content_block.type == "tool_use":
+                events_to_fire.append(
+                    build(
+                        BetaInputJsonEvent,
+                        type="input_json",
+                        partial_json=event.delta.partial_json,
+                        snapshot=content_block.input,
+                    )
+                )
+        elif event.delta.type == "citations_delta":
+            if content_block.type == "text":
+                events_to_fire.append(
+                    build(
+                        BetaCitationEvent,
+                        type="citation",
+                        citation=event.delta.citation,
+                        snapshot=content_block.citations or [],
+                    )
+                )
+        elif event.delta.type == "thinking_delta":
+            if content_block.type == "thinking":
+                events_to_fire.append(
+                    build(
+                        BetaThinkingEvent,
+                        type="thinking",
+                        thinking=event.delta.thinking,
+                        snapshot=content_block.thinking,
+                    )
+                )
+        elif event.delta.type == "signature_delta":
+            if content_block.type == "thinking":
+                events_to_fire.append(
+                    build(
+                        BetaSignatureEvent,
+                        type="signature",
+                        signature=content_block.signature,
+                    )
+                )
+            pass
+        else:
+            # we only want exhaustive checking for linters, not at runtime
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event.delta)
+    elif event.type == "content_block_stop":
+        content_block = message_snapshot.content[event.index]
+
+        events_to_fire.append(
+            build(BetaContentBlockStopEvent, type="content_block_stop", index=event.index, content_block=content_block),
+        )
+    else:
+        # we only want exhaustive checking for linters, not at runtime
+        if TYPE_CHECKING:  # type: ignore[unreachable]
+            assert_never(event)
+
+    return events_to_fire
+
+
+JSON_BUF_PROPERTY = "__json_buf"
+
+
+def accumulate_event(
+    *,
+    event: BetaRawMessageStreamEvent,
+    current_snapshot: BetaMessage | None,
+) -> BetaMessage:
+    if not isinstance(cast(Any, event), BaseModel):
+        event = cast(  # pyright: ignore[reportUnnecessaryCast]
+            BetaRawMessageStreamEvent,
+            construct_type_unchecked(
+                type_=cast(Type[BetaRawMessageStreamEvent], BetaRawMessageStreamEvent),
+                value=event,
+            ),
+        )
+        if not isinstance(cast(Any, event), BaseModel):
+            raise TypeError(f"Unexpected event runtime type, after deserialising twice - {event} - {type(event)}")
+
+    if current_snapshot is None:
+        if event.type == "message_start":
+            return BetaMessage.construct(**cast(Any, event.message.to_dict()))
+
+        raise RuntimeError(f'Unexpected event order, got {event.type} before "message_start"')
+
+    if event.type == "content_block_start":
+        # TODO: check index
+        current_snapshot.content.append(
+            cast(
+                BetaContentBlock,
+                construct_type(type_=BetaContentBlock, value=event.content_block.model_dump()),
+            ),
+        )
+    elif event.type == "content_block_delta":
+        content = current_snapshot.content[event.index]
+        if event.delta.type == "text_delta":
+            if content.type == "text":
+                content.text += event.delta.text
+        elif event.delta.type == "input_json_delta":
+            if content.type == "tool_use":
+                from jiter import from_json
+
+                # we need to keep track of the raw JSON string as well so that we can
+                # re-parse it for each delta, for now we just store it as an untyped
+                # property on the snapshot
+                json_buf = cast(bytes, getattr(content, JSON_BUF_PROPERTY, b""))
+                json_buf += bytes(event.delta.partial_json, "utf-8")
+
+                if json_buf:
+                    content.input = from_json(json_buf, partial_mode=True)
+
+                setattr(content, JSON_BUF_PROPERTY, json_buf)
+        elif event.delta.type == "citations_delta":
+            if content.type == "text":
+                if not content.citations:
+                    content.citations = [event.delta.citation]
+                else:
+                    content.citations.append(event.delta.citation)
+        elif event.delta.type == "thinking_delta":
+            if content.type == "thinking":
+                content.thinking += event.delta.thinking
+        elif event.delta.type == "signature_delta":
+            if content.type == "thinking":
+                content.signature = event.delta.signature
+        else:
+            # we only want exhaustive checking for linters, not at runtime
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event.delta)
+    elif event.type == "message_delta":
+        current_snapshot.stop_reason = event.delta.stop_reason
+        current_snapshot.stop_sequence = event.delta.stop_sequence
+        current_snapshot.usage.output_tokens = event.usage.output_tokens
+
+    return current_snapshot
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_beta_types.py b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_beta_types.py
new file mode 100644
index 00000000..24bb710c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_beta_types.py
@@ -0,0 +1,100 @@
+from typing import Union
+from typing_extensions import List, Literal, Annotated
+
+from ..._models import BaseModel
+from ...types.beta import (
+    BetaMessage,
+    BetaContentBlock,
+    BetaRawMessageStopEvent,
+    BetaRawMessageDeltaEvent,
+    BetaRawMessageStartEvent,
+    BetaRawContentBlockStopEvent,
+    BetaRawContentBlockDeltaEvent,
+    BetaRawContentBlockStartEvent,
+)
+from ..._utils._transform import PropertyInfo
+from ...types.beta.beta_citations_delta import Citation
+
+
+class BetaTextEvent(BaseModel):
+    type: Literal["text"]
+
+    text: str
+    """The text delta"""
+
+    snapshot: str
+    """The entire accumulated text"""
+
+
+class BetaCitationEvent(BaseModel):
+    type: Literal["citation"]
+
+    citation: Citation
+    """The new citation"""
+
+    snapshot: List[Citation]
+    """All of the accumulated citations"""
+
+
+class BetaThinkingEvent(BaseModel):
+    type: Literal["thinking"]
+
+    thinking: str
+    """The thinking delta"""
+
+    snapshot: str
+    """The accumulated thinking so far"""
+
+
+class BetaSignatureEvent(BaseModel):
+    type: Literal["signature"]
+
+    signature: str
+    """The signature of the thinking block"""
+
+
+class BetaInputJsonEvent(BaseModel):
+    type: Literal["input_json"]
+
+    partial_json: str
+    """A partial JSON string delta
+
+    e.g. `'"San Francisco,'`
+    """
+
+    snapshot: object
+    """The currently accumulated parsed object.
+
+
+    e.g. `{'location': 'San Francisco, CA'}`
+    """
+
+
+class BetaMessageStopEvent(BetaRawMessageStopEvent):
+    type: Literal["message_stop"]
+
+    message: BetaMessage
+
+
+class BetaContentBlockStopEvent(BetaRawContentBlockStopEvent):
+    type: Literal["content_block_stop"]
+
+    content_block: BetaContentBlock
+
+
+BetaMessageStreamEvent = Annotated[
+    Union[
+        BetaTextEvent,
+        BetaCitationEvent,
+        BetaThinkingEvent,
+        BetaSignatureEvent,
+        BetaInputJsonEvent,
+        BetaRawMessageStartEvent,
+        BetaRawMessageDeltaEvent,
+        BetaMessageStopEvent,
+        BetaRawContentBlockStartEvent,
+        BetaRawContentBlockDeltaEvent,
+        BetaContentBlockStopEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_messages.py b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_messages.py
new file mode 100644
index 00000000..09ed24f9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_messages.py
@@ -0,0 +1,462 @@
+from __future__ import annotations
+
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Type, Callable, cast
+from typing_extensions import Self, Iterator, Awaitable, AsyncIterator, assert_never
+
+import httpx
+from pydantic import BaseModel
+
+from ._types import (
+    TextEvent,
+    CitationEvent,
+    ThinkingEvent,
+    InputJsonEvent,
+    SignatureEvent,
+    MessageStopEvent,
+    MessageStreamEvent,
+    ContentBlockStopEvent,
+)
+from ...types import Message, ContentBlock, RawMessageStreamEvent
+from ..._utils import consume_sync_iterator, consume_async_iterator
+from ..._models import build, construct_type, construct_type_unchecked
+from ..._streaming import Stream, AsyncStream
+
+
+class MessageStream:
+    text_stream: Iterator[str]
+    """Iterator over just the text deltas in the stream.
+
+    ```py
+    for text in stream.text_stream:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self, raw_stream: Stream[RawMessageStreamEvent]) -> None:
+        self._raw_stream = raw_stream
+        self.text_stream = self.__stream_text__()
+        self._iterator = self.__stream__()
+        self.__final_message_snapshot: Message | None = None
+
+    @property
+    def response(self) -> httpx.Response:
+        return self._raw_stream.response
+
+    @property
+    def request_id(self) -> str | None:
+        return self.response.headers.get("request-id")  # type: ignore[no-any-return]
+
+    def __next__(self) -> MessageStreamEvent:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[MessageStreamEvent]:
+        for item in self._iterator:
+            yield item
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self._raw_stream.close()
+
+    def get_final_message(self) -> Message:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `Message` object.
+        """
+        self.until_done()
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    def get_final_text(self) -> str:
+        """Returns all `text` content blocks concatenated together.
+
+        > [!NOTE]
+        > Currently the API will only respond with a single content block.
+
+        Will raise an error if no `text` content blocks were returned.
+        """
+        message = self.get_final_message()
+        text_blocks: list[str] = []
+        for block in message.content:
+            if block.type == "text":
+                text_blocks.append(block.text)
+
+        if not text_blocks:
+            raise RuntimeError("Expected to have received at least 1 text block")
+
+        return "".join(text_blocks)
+
+    def until_done(self) -> None:
+        """Blocks until the stream has been consumed"""
+        consume_sync_iterator(self)
+
+    # properties
+    @property
+    def current_message_snapshot(self) -> Message:
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    def __stream__(self) -> Iterator[MessageStreamEvent]:
+        for sse_event in self._raw_stream:
+            self.__final_message_snapshot = accumulate_event(
+                event=sse_event,
+                current_snapshot=self.__final_message_snapshot,
+            )
+
+            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
+            for event in events_to_fire:
+                yield event
+
+    def __stream_text__(self) -> Iterator[str]:
+        for chunk in self:
+            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
+                yield chunk.delta.text
+
+
+class MessageStreamManager:
+    """Wrapper over MessageStream that is returned by `.stream()`.
+
+    ```py
+    with client.messages.stream(...) as stream:
+        for chunk in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[RawMessageStreamEvent]],
+    ) -> None:
+        self.__stream: MessageStream | None = None
+        self.__api_request = api_request
+
+    def __enter__(self) -> MessageStream:
+        raw_stream = self.__api_request()
+        self.__stream = MessageStream(raw_stream)
+        return self.__stream
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncMessageStream:
+    text_stream: AsyncIterator[str]
+    """Async iterator over just the text deltas in the stream.
+
+    ```py
+    async for text in stream.text_stream:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self, raw_stream: AsyncStream[RawMessageStreamEvent]) -> None:
+        self._raw_stream = raw_stream
+        self.text_stream = self.__stream_text__()
+        self._iterator = self.__stream__()
+        self.__final_message_snapshot: Message | None = None
+
+    @property
+    def response(self) -> httpx.Response:
+        return self._raw_stream.response
+
+    @property
+    def request_id(self) -> str | None:
+        return self.response.headers.get("request-id")  # type: ignore[no-any-return]
+
+    async def __anext__(self) -> MessageStreamEvent:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[MessageStreamEvent]:
+        async for item in self._iterator:
+            yield item
+
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self._raw_stream.close()
+
+    async def get_final_message(self) -> Message:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `Message` object.
+        """
+        await self.until_done()
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    async def get_final_text(self) -> str:
+        """Returns all `text` content blocks concatenated together.
+
+        > [!NOTE]
+        > Currently the API will only respond with a single content block.
+
+        Will raise an error if no `text` content blocks were returned.
+        """
+        message = await self.get_final_message()
+        text_blocks: list[str] = []
+        for block in message.content:
+            if block.type == "text":
+                text_blocks.append(block.text)
+
+        if not text_blocks:
+            raise RuntimeError("Expected to have received at least 1 text block")
+
+        return "".join(text_blocks)
+
+    async def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        await consume_async_iterator(self)
+
+    # properties
+    @property
+    def current_message_snapshot(self) -> Message:
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    async def __stream__(self) -> AsyncIterator[MessageStreamEvent]:
+        async for sse_event in self._raw_stream:
+            self.__final_message_snapshot = accumulate_event(
+                event=sse_event,
+                current_snapshot=self.__final_message_snapshot,
+            )
+
+            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
+            for event in events_to_fire:
+                yield event
+
+    async def __stream_text__(self) -> AsyncIterator[str]:
+        async for chunk in self:
+            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
+                yield chunk.delta.text
+
+
+class AsyncMessageStreamManager:
+    """Wrapper over AsyncMessageStream that is returned by `.stream()`
+    so that an async context manager can be used without `await`ing the
+    original client call.
+
+    ```py
+    async with client.messages.stream(...) as stream:
+        async for chunk in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[RawMessageStreamEvent]],
+    ) -> None:
+        self.__stream: AsyncMessageStream | None = None
+        self.__api_request = api_request
+
+    async def __aenter__(self) -> AsyncMessageStream:
+        raw_stream = await self.__api_request
+        self.__stream = AsyncMessageStream(raw_stream)
+        return self.__stream
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+def build_events(
+    *,
+    event: RawMessageStreamEvent,
+    message_snapshot: Message,
+) -> list[MessageStreamEvent]:
+    events_to_fire: list[MessageStreamEvent] = []
+
+    if event.type == "message_start":
+        events_to_fire.append(event)
+    elif event.type == "message_delta":
+        events_to_fire.append(event)
+    elif event.type == "message_stop":
+        events_to_fire.append(build(MessageStopEvent, type="message_stop", message=message_snapshot))
+    elif event.type == "content_block_start":
+        events_to_fire.append(event)
+    elif event.type == "content_block_delta":
+        events_to_fire.append(event)
+
+        content_block = message_snapshot.content[event.index]
+        if event.delta.type == "text_delta":
+            if content_block.type == "text":
+                events_to_fire.append(
+                    build(
+                        TextEvent,
+                        type="text",
+                        text=event.delta.text,
+                        snapshot=content_block.text,
+                    )
+                )
+        elif event.delta.type == "input_json_delta":
+            if content_block.type == "tool_use":
+                events_to_fire.append(
+                    build(
+                        InputJsonEvent,
+                        type="input_json",
+                        partial_json=event.delta.partial_json,
+                        snapshot=content_block.input,
+                    )
+                )
+        elif event.delta.type == "citations_delta":
+            if content_block.type == "text":
+                events_to_fire.append(
+                    build(
+                        CitationEvent,
+                        type="citation",
+                        citation=event.delta.citation,
+                        snapshot=content_block.citations or [],
+                    )
+                )
+        elif event.delta.type == "thinking_delta":
+            if content_block.type == "thinking":
+                events_to_fire.append(
+                    build(
+                        ThinkingEvent,
+                        type="thinking",
+                        thinking=event.delta.thinking,
+                        snapshot=content_block.thinking,
+                    )
+                )
+        elif event.delta.type == "signature_delta":
+            if content_block.type == "thinking":
+                events_to_fire.append(
+                    build(
+                        SignatureEvent,
+                        type="signature",
+                        signature=content_block.signature,
+                    )
+                )
+            pass
+        else:
+            # we only want exhaustive checking for linters, not at runtime
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event.delta)
+    elif event.type == "content_block_stop":
+        content_block = message_snapshot.content[event.index]
+
+        events_to_fire.append(
+            build(ContentBlockStopEvent, type="content_block_stop", index=event.index, content_block=content_block),
+        )
+    else:
+        # we only want exhaustive checking for linters, not at runtime
+        if TYPE_CHECKING:  # type: ignore[unreachable]
+            assert_never(event)
+
+    return events_to_fire
+
+
+JSON_BUF_PROPERTY = "__json_buf"
+
+
+def accumulate_event(
+    *,
+    event: RawMessageStreamEvent,
+    current_snapshot: Message | None,
+) -> Message:
+    if not isinstance(cast(Any, event), BaseModel):
+        event = cast(  # pyright: ignore[reportUnnecessaryCast]
+            RawMessageStreamEvent,
+            construct_type_unchecked(
+                type_=cast(Type[RawMessageStreamEvent], RawMessageStreamEvent),
+                value=event,
+            ),
+        )
+        if not isinstance(cast(Any, event), BaseModel):
+            raise TypeError(f"Unexpected event runtime type, after deserialising twice - {event} - {type(event)}")
+
+    if current_snapshot is None:
+        if event.type == "message_start":
+            return Message.construct(**cast(Any, event.message.to_dict()))
+
+        raise RuntimeError(f'Unexpected event order, got {event.type} before "message_start"')
+
+    if event.type == "content_block_start":
+        # TODO: check index
+        current_snapshot.content.append(
+            cast(
+                ContentBlock,
+                construct_type(type_=ContentBlock, value=event.content_block.model_dump()),
+            ),
+        )
+    elif event.type == "content_block_delta":
+        content = current_snapshot.content[event.index]
+        if event.delta.type == "text_delta":
+            if content.type == "text":
+                content.text += event.delta.text
+        elif event.delta.type == "input_json_delta":
+            if content.type == "tool_use":
+                from jiter import from_json
+
+                # we need to keep track of the raw JSON string as well so that we can
+                # re-parse it for each delta, for now we just store it as an untyped
+                # property on the snapshot
+                json_buf = cast(bytes, getattr(content, JSON_BUF_PROPERTY, b""))
+                json_buf += bytes(event.delta.partial_json, "utf-8")
+
+                if json_buf:
+                    content.input = from_json(json_buf, partial_mode=True)
+
+                setattr(content, JSON_BUF_PROPERTY, json_buf)
+        elif event.delta.type == "citations_delta":
+            if content.type == "text":
+                if not content.citations:
+                    content.citations = [event.delta.citation]
+                else:
+                    content.citations.append(event.delta.citation)
+        elif event.delta.type == "thinking_delta":
+            if content.type == "thinking":
+                content.thinking += event.delta.thinking
+        elif event.delta.type == "signature_delta":
+            if content.type == "thinking":
+                content.signature = event.delta.signature
+        else:
+            # we only want exhaustive checking for linters, not at runtime
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event.delta)
+    elif event.type == "message_delta":
+        current_snapshot.stop_reason = event.delta.stop_reason
+        current_snapshot.stop_sequence = event.delta.stop_sequence
+        current_snapshot.usage.output_tokens = event.usage.output_tokens
+
+    return current_snapshot
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_types.py b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_types.py
new file mode 100644
index 00000000..0918427a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/streaming/_types.py
@@ -0,0 +1,100 @@
+from typing import Union
+from typing_extensions import List, Literal, Annotated
+
+from ...types import (
+    Message,
+    ContentBlock,
+    MessageDeltaEvent as RawMessageDeltaEvent,
+    MessageStartEvent as RawMessageStartEvent,
+    RawMessageStopEvent,
+    ContentBlockDeltaEvent as RawContentBlockDeltaEvent,
+    ContentBlockStartEvent as RawContentBlockStartEvent,
+    RawContentBlockStopEvent,
+)
+from ..._models import BaseModel
+from ..._utils._transform import PropertyInfo
+from ...types.citations_delta import Citation
+
+
+class TextEvent(BaseModel):
+    type: Literal["text"]
+
+    text: str
+    """The text delta"""
+
+    snapshot: str
+    """The entire accumulated text"""
+
+
+class CitationEvent(BaseModel):
+    type: Literal["citation"]
+
+    citation: Citation
+    """The new citation"""
+
+    snapshot: List[Citation]
+    """All of the accumulated citations"""
+
+
+class ThinkingEvent(BaseModel):
+    type: Literal["thinking"]
+
+    thinking: str
+    """The thinking delta"""
+
+    snapshot: str
+    """The accumulated thinking so far"""
+
+
+class SignatureEvent(BaseModel):
+    type: Literal["signature"]
+
+    signature: str
+    """The signature of the thinking block"""
+
+
+class InputJsonEvent(BaseModel):
+    type: Literal["input_json"]
+
+    partial_json: str
+    """A partial JSON string delta
+
+    e.g. `'"San Francisco,'`
+    """
+
+    snapshot: object
+    """The currently accumulated parsed object.
+
+
+    e.g. `{'location': 'San Francisco, CA'}`
+    """
+
+
+class MessageStopEvent(RawMessageStopEvent):
+    type: Literal["message_stop"]
+
+    message: Message
+
+
+class ContentBlockStopEvent(RawContentBlockStopEvent):
+    type: Literal["content_block_stop"]
+
+    content_block: ContentBlock
+
+
+MessageStreamEvent = Annotated[
+    Union[
+        TextEvent,
+        CitationEvent,
+        ThinkingEvent,
+        SignatureEvent,
+        InputJsonEvent,
+        RawMessageStartEvent,
+        RawMessageDeltaEvent,
+        MessageStopEvent,
+        RawContentBlockStartEvent,
+        RawContentBlockDeltaEvent,
+        ContentBlockStopEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/__init__.py
new file mode 100644
index 00000000..45b6301e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/__init__.py
@@ -0,0 +1 @@
+from ._client import AnthropicVertex as AnthropicVertex, AsyncAnthropicVertex as AsyncAnthropicVertex
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_auth.py b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_auth.py
new file mode 100644
index 00000000..3063016a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_auth.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, cast
+
+from .._extras import google_auth
+
+if TYPE_CHECKING:
+    from google.auth.credentials import Credentials  # type: ignore[import-untyped]
+
+# pyright: reportMissingTypeStubs=false, reportUnknownVariableType=false, reportUnknownMemberType=false, reportUnknownArgumentType=false
+# google libraries don't provide types :/
+
+# Note: these functions are blocking as they make HTTP requests, the async
+# client runs these functions in a separate thread to ensure they do not
+# cause synchronous blocking issues.
+
+
+def load_auth(*, project_id: str | None) -> tuple[Credentials, str]:
+    from google.auth.transport.requests import Request  # type: ignore[import-untyped]
+
+    credentials, loaded_project_id = google_auth.default(
+        scopes=["https://www.googleapis.com/auth/cloud-platform"],
+    )
+    credentials = cast(Any, credentials)
+    credentials.refresh(Request())
+
+    if not project_id:
+        project_id = loaded_project_id
+
+    if not project_id:
+        raise ValueError("Could not resolve project_id")
+
+    if not isinstance(project_id, str):
+        raise TypeError(f"Expected project_id to be a str but got {type(project_id)}")
+
+    return credentials, project_id
+
+
+def refresh_auth(credentials: Credentials) -> None:
+    from google.auth.transport.requests import Request  # type: ignore[import-untyped]
+
+    credentials.refresh(Request())
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_beta.py b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_beta.py
new file mode 100644
index 00000000..f2a91b42
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_beta.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ._beta_messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return BetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._beta.messages)
+
+
+class AsyncBetaWithRawResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._beta.messages)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._beta.messages)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._beta.messages)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_beta_messages.py b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_beta_messages.py
new file mode 100644
index 00000000..332f6fba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_beta_messages.py
@@ -0,0 +1,93 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ... import _legacy_response
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...resources.beta import Messages as FirstPartyMessagesAPI, AsyncMessages as FirstPartyAsyncMessagesAPI
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    create = FirstPartyMessagesAPI.create
+
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+
+class AsyncMessages(AsyncAPIResource):
+    create = FirstPartyAsyncMessagesAPI.create
+
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_client.py b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_client.py
new file mode 100644
index 00000000..c5ee9909
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/lib/vertex/_client.py
@@ -0,0 +1,406 @@
+from __future__ import annotations
+
+import os
+from typing import TYPE_CHECKING, Any, Union, Mapping, TypeVar
+from typing_extensions import Self, override
+
+import httpx
+
+from ... import _exceptions
+from ._auth import load_auth, refresh_auth
+from ._beta import Beta, AsyncBeta
+from ..._types import NOT_GIVEN, NotGiven
+from ..._utils import is_dict, asyncify, is_given
+from ..._compat import model_copy, typed_cached_property
+from ..._models import FinalRequestOptions
+from ..._version import __version__
+from ..._streaming import Stream, AsyncStream
+from ..._exceptions import AnthropicError, APIStatusError
+from ..._base_client import (
+    DEFAULT_MAX_RETRIES,
+    BaseClient,
+    SyncAPIClient,
+    AsyncAPIClient,
+)
+from ...resources.messages import Messages, AsyncMessages
+
+if TYPE_CHECKING:
+    from google.auth.credentials import Credentials as GoogleCredentials  # type: ignore
+
+
+DEFAULT_VERSION = "vertex-2023-10-16"
+
+_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
+_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
+
+
+class BaseVertexClient(BaseClient[_HttpxClientT, _DefaultStreamT]):
+    @typed_cached_property
+    def region(self) -> str:
+        raise RuntimeError("region not set")
+
+    @typed_cached_property
+    def project_id(self) -> str | None:
+        project_id = os.environ.get("ANTHROPIC_VERTEX_PROJECT_ID")
+        if project_id:
+            return project_id
+
+        return None
+
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=body)
+
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
+
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
+
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=body)
+
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=body)
+
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
+
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=body)
+
+        if response.status_code == 503:
+            return _exceptions.ServiceUnavailableError(err_msg, response=response, body=body)
+
+        if response.status_code == 504:
+            return _exceptions.DeadlineExceededError(err_msg, response=response, body=body)
+
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
+        return APIStatusError(err_msg, response=response, body=body)
+
+
+class AnthropicVertex(BaseVertexClient[httpx.Client, Stream[Any]], SyncAPIClient):
+    messages: Messages
+    beta: Beta
+
+    def __init__(
+        self,
+        *,
+        region: str | NotGiven = NOT_GIVEN,
+        project_id: str | NotGiven = NOT_GIVEN,
+        access_token: str | None = None,
+        credentials: GoogleCredentials | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        http_client: httpx.Client | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None:
+        if not is_given(region):
+            region = os.environ.get("CLOUD_ML_REGION", NOT_GIVEN)
+        if not is_given(region):
+            raise ValueError(
+                "No region was given. The client should be instantiated with the `region` argument or the `CLOUD_ML_REGION` environment variable should be set."
+            )
+
+        if base_url is None:
+            base_url = os.environ.get("ANTHROPIC_VERTEX_BASE_URL")
+            if base_url is None:
+                base_url = f"https://{region}-aiplatform.googleapis.com/v1"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            http_client=http_client,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        if is_given(project_id):
+            self.project_id = project_id
+
+        self.region = region
+        self.access_token = access_token
+        self.credentials = credentials
+
+        self.messages = Messages(self)
+        self.beta = Beta(self)
+
+    @override
+    def _prepare_options(self, options: FinalRequestOptions) -> FinalRequestOptions:
+        return _prepare_options(options, project_id=self.project_id, region=self.region)
+
+    @override
+    def _prepare_request(self, request: httpx.Request) -> None:
+        if request.headers.get("Authorization"):
+            # already authenticated, nothing for us to do
+            return
+
+        request.headers["Authorization"] = f"Bearer {self._ensure_access_token()}"
+
+    def _ensure_access_token(self) -> str:
+        if self.access_token is not None:
+            return self.access_token
+
+        if not self.credentials:
+            self.credentials, project_id = load_auth(project_id=self.project_id)
+            if not self.project_id:
+                self.project_id = project_id
+
+        if self.credentials.expired or not self.credentials.token:
+            refresh_auth(self.credentials)
+
+        if not self.credentials.token:
+            raise RuntimeError("Could not resolve API token from the environment")
+
+        assert isinstance(self.credentials.token, str)
+        return self.credentials.token
+
+    def copy(
+        self,
+        *,
+        region: str | NotGiven = NOT_GIVEN,
+        project_id: str | NotGiven = NOT_GIVEN,
+        access_token: str | None = None,
+        credentials: GoogleCredentials | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.Client | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        http_client = http_client or self._client
+
+        return self.__class__(
+            region=region if is_given(region) else self.region,
+            project_id=project_id if is_given(project_id) else self.project_id or NOT_GIVEN,
+            access_token=access_token or self.access_token,
+            credentials=credentials or self.credentials,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+
+class AsyncAnthropicVertex(BaseVertexClient[httpx.AsyncClient, AsyncStream[Any]], AsyncAPIClient):
+    messages: AsyncMessages
+    beta: AsyncBeta
+
+    def __init__(
+        self,
+        *,
+        region: str | NotGiven = NOT_GIVEN,
+        project_id: str | NotGiven = NOT_GIVEN,
+        access_token: str | None = None,
+        credentials: GoogleCredentials | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        http_client: httpx.AsyncClient | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None:
+        if not is_given(region):
+            region = os.environ.get("CLOUD_ML_REGION", NOT_GIVEN)
+        if not is_given(region):
+            raise ValueError(
+                "No region was given. The client should be instantiated with the `region` argument or the `CLOUD_ML_REGION` environment variable should be set."
+            )
+
+        if base_url is None:
+            base_url = os.environ.get("ANTHROPIC_VERTEX_BASE_URL")
+            if base_url is None:
+                base_url = f"https://{region}-aiplatform.googleapis.com/v1"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            http_client=http_client,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        if is_given(project_id):
+            self.project_id = project_id
+
+        self.region = region
+        self.access_token = access_token
+        self.credentials = credentials
+
+        self.messages = AsyncMessages(self)
+        self.beta = AsyncBeta(self)
+
+    @override
+    async def _prepare_options(self, options: FinalRequestOptions) -> FinalRequestOptions:
+        return _prepare_options(options, project_id=self.project_id, region=self.region)
+
+    @override
+    async def _prepare_request(self, request: httpx.Request) -> None:
+        if request.headers.get("Authorization"):
+            # already authenticated, nothing for us to do
+            return
+
+        request.headers["Authorization"] = f"Bearer {await self._ensure_access_token()}"
+
+    async def _ensure_access_token(self) -> str:
+        if self.access_token is not None:
+            return self.access_token
+
+        if not self.credentials:
+            self.credentials, project_id = await asyncify(load_auth)(project_id=self.project_id)
+            if not self.project_id:
+                self.project_id = project_id
+
+        if self.credentials.expired or not self.credentials.token:
+            await asyncify(refresh_auth)(self.credentials)
+
+        if not self.credentials.token:
+            raise RuntimeError("Could not resolve API token from the environment")
+
+        assert isinstance(self.credentials.token, str)
+        return self.credentials.token
+
+    def copy(
+        self,
+        *,
+        region: str | NotGiven = NOT_GIVEN,
+        project_id: str | NotGiven = NOT_GIVEN,
+        access_token: str | None = None,
+        credentials: GoogleCredentials | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.AsyncClient | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        http_client = http_client or self._client
+
+        return self.__class__(
+            region=region if is_given(region) else self.region,
+            project_id=project_id if is_given(project_id) else self.project_id or NOT_GIVEN,
+            access_token=access_token or self.access_token,
+            credentials=credentials or self.credentials,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+
+def _prepare_options(input_options: FinalRequestOptions, *, project_id: str | None, region: str) -> FinalRequestOptions:
+    options = model_copy(input_options, deep=True)
+
+    if is_dict(options.json_data):
+        options.json_data.setdefault("anthropic_version", DEFAULT_VERSION)
+
+    if options.url in {"/v1/messages", "/v1/messages?beta=true"} and options.method == "post":
+        if project_id is None:
+            raise RuntimeError(
+                "No project_id was given and it could not be resolved from credentials. The client should be instantiated with the `project_id` argument or the `ANTHROPIC_VERTEX_PROJECT_ID` environment variable should be set."
+            )
+
+        if not is_dict(options.json_data):
+            raise RuntimeError("Expected json data to be a dictionary for post /v1/messages")
+
+        model = options.json_data.pop("model")
+        stream = options.json_data.get("stream", False)
+        specifier = "streamRawPredict" if stream else "rawPredict"
+
+        options.url = f"/projects/{project_id}/locations/{region}/publishers/anthropic/models/{model}:{specifier}"
+
+    if options.url in {"/v1/messages/count_tokens", "/v1/messages/count_tokens?beta=true"} and options.method == "post":
+        if project_id is None:
+            raise RuntimeError(
+                "No project_id was given and it could not be resolved from credentials. The client should be instantiated with the `project_id` argument or the `ANTHROPIC_VERTEX_PROJECT_ID` environment variable should be set."
+            )
+
+        options.url = f"/projects/{project_id}/locations/{region}/publishers/anthropic/models/count-tokens:rawPredict"
+
+    if options.url.startswith("/v1/messages/batches"):
+        raise AnthropicError("The Batch API is not supported in the Vertex client yet")
+
+    return options
diff --git a/.venv/lib/python3.12/site-packages/anthropic/pagination.py b/.venv/lib/python3.12/site-packages/anthropic/pagination.py
new file mode 100644
index 00000000..c4553fba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/pagination.py
@@ -0,0 +1,84 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Generic, TypeVar, Optional
+from typing_extensions import override
+
+from ._base_client import BasePage, PageInfo, BaseSyncPage, BaseAsyncPage
+
+__all__ = ["SyncPage", "AsyncPage"]
+
+_T = TypeVar("_T")
+
+
+class SyncPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+    data: List[_T]
+    has_more: Optional[bool] = None
+    first_id: Optional[str] = None
+    last_id: Optional[str] = None
+
+    @override
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
+
+    @override
+    def next_page_info(self) -> Optional[PageInfo]:
+        if self._options.params.get("before_id"):
+            first_id = self.first_id
+            if not first_id:
+                return None
+
+            return PageInfo(params={"before_id": first_id})
+
+        last_id = self.last_id
+        if not last_id:
+            return None
+
+        return PageInfo(params={"after_id": last_id})
+
+
+class AsyncPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
+    data: List[_T]
+    has_more: Optional[bool] = None
+    first_id: Optional[str] = None
+    last_id: Optional[str] = None
+
+    @override
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
+
+    @override
+    def next_page_info(self) -> Optional[PageInfo]:
+        if self._options.params.get("before_id"):
+            first_id = self.first_id
+            if not first_id:
+                return None
+
+            return PageInfo(params={"before_id": first_id})
+
+        last_id = self.last_id
+        if not last_id:
+            return None
+
+        return PageInfo(params={"after_id": last_id})
diff --git a/.venv/lib/python3.12/site-packages/anthropic/py.typed b/.venv/lib/python3.12/site-packages/anthropic/py.typed
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/py.typed
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/resources/__init__.py
new file mode 100644
index 00000000..ffff8855
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/__init__.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/beta/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/__init__.py
new file mode 100644
index 00000000..82b343fa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/beta/beta.py b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/beta.py
new file mode 100644
index 00000000..ae5c7d98
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/beta.py
@@ -0,0 +1,134 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .messages.messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+    @cached_property
+    def models(self) -> Models:
+        return Models(self._client)
+
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return BetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def models(self) -> AsyncModels:
+        return AsyncModels(self._client)
+
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def models(self) -> ModelsWithRawResponse:
+        return ModelsWithRawResponse(self._beta.models)
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._beta.messages)
+
+
+class AsyncBetaWithRawResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def models(self) -> AsyncModelsWithRawResponse:
+        return AsyncModelsWithRawResponse(self._beta.models)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._beta.messages)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def models(self) -> ModelsWithStreamingResponse:
+        return ModelsWithStreamingResponse(self._beta.models)
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._beta.messages)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def models(self) -> AsyncModelsWithStreamingResponse:
+        return AsyncModelsWithStreamingResponse(self._beta.models)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._beta.messages)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/__init__.py
new file mode 100644
index 00000000..34b0a923
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/batches.py b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/batches.py
new file mode 100644
index 00000000..f5483ca6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/batches.py
@@ -0,0 +1,889 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable
+from itertools import chain
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    is_given,
+    maybe_transform,
+    strip_not_given,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncPage, AsyncPage
+from ...._exceptions import AnthropicError
+from ...._base_client import AsyncPaginator, make_request_options
+from ...._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder
+from ....types.beta.messages import batch_list_params, batch_create_params
+from ....types.anthropic_beta_param import AnthropicBetaParam
+from ....types.beta.messages.beta_message_batch import BetaMessageBatch
+from ....types.beta.messages.beta_deleted_message_batch import BetaDeletedMessageBatch
+from ....types.beta.messages.beta_message_batch_individual_response import BetaMessageBatchIndividualResponse
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return self._post(
+            "/v1/messages/batches?beta=true",
+            body=maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessageBatch,
+        )
+
+    def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return self._get(
+            f"/v1/messages/batches/{message_batch_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[BetaMessageBatch]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return self._get_api_list(
+            "/v1/messages/batches?beta=true",
+            page=SyncPage[BetaMessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=BetaMessageBatch,
+        )
+
+    def delete(
+        self,
+        message_batch_id: str,
+        *,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaDeletedMessageBatch:
+        """
+        Delete a Message Batch.
+
+        Message Batches can only be deleted once they've finished processing. If you'd
+        like to delete an in-progress batch, you must first cancel it.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return self._delete(
+            f"/v1/messages/batches/{message_batch_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaDeletedMessageBatch,
+        )
+
+    def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessageBatch,
+        )
+
+    def results(
+        self,
+        message_batch_id: str,
+        *,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JSONLDecoder[BetaMessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=JSONLDecoder[BetaMessageBatchIndividualResponse],
+            stream=True,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return await self._post(
+            "/v1/messages/batches?beta=true",
+            body=await async_maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessageBatch,
+        )
+
+    async def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return await self._get(
+            f"/v1/messages/batches/{message_batch_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[BetaMessageBatch, AsyncPage[BetaMessageBatch]]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return self._get_api_list(
+            "/v1/messages/batches?beta=true",
+            page=AsyncPage[BetaMessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=BetaMessageBatch,
+        )
+
+    async def delete(
+        self,
+        message_batch_id: str,
+        *,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaDeletedMessageBatch:
+        """
+        Delete a Message Batch.
+
+        Message Batches can only be deleted once they've finished processing. If you'd
+        like to delete an in-progress batch, you must first cancel it.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return await self._delete(
+            f"/v1/messages/batches/{message_batch_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaDeletedMessageBatch,
+        )
+
+    async def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return await self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessageBatch,
+        )
+
+    async def results(
+        self,
+        message_batch_id: str,
+        *,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncJSONLDecoder[BetaMessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = await self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["message-batches-2024-09-24"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "message-batches-2024-09-24", **(extra_headers or {})}
+        return await self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AsyncJSONLDecoder[BetaMessageBatchIndividualResponse],
+            stream=True,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/messages.py b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/messages.py
new file mode 100644
index 00000000..c1c2ef06
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/messages/messages.py
@@ -0,0 +1,2587 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import warnings
+from typing import List, Union, Iterable
+from functools import partial
+from itertools import chain
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .... import _legacy_response
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    strip_not_given,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._constants import DEFAULT_TIMEOUT
+from ...._streaming import Stream, AsyncStream
+from ....types.beta import (
+    BetaThinkingConfigParam,
+    message_create_params,
+    message_count_tokens_params,
+)
+from ...._base_client import make_request_options
+from ....lib.streaming import BetaMessageStreamManager, BetaAsyncMessageStreamManager
+from ...messages.messages import DEPRECATED_MODELS
+from ....types.model_param import ModelParam
+from ....types.beta.beta_message import BetaMessage
+from ....types.anthropic_beta_param import AnthropicBetaParam
+from ....types.beta.beta_message_param import BetaMessageParam
+from ....types.beta.beta_metadata_param import BetaMetadataParam
+from ....types.beta.beta_text_block_param import BetaTextBlockParam
+from ....types.beta.beta_tool_union_param import BetaToolUnionParam
+from ....types.beta.beta_tool_choice_param import BetaToolChoiceParam
+from ....types.beta.beta_message_tokens_count import BetaMessageTokensCount
+from ....types.beta.beta_thinking_config_param import BetaThinkingConfigParam
+from ....types.beta.beta_raw_message_stream_event import BetaRawMessageStreamEvent
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def batches(self) -> Batches:
+        return Batches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessage:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        stream: Literal[True],
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[BetaRawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        stream: bool,
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessage | Stream[BetaRawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessage | Stream[BetaRawMessageStreamEvent]:
+        if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = self._client._calculate_nonstreaming_timeout(max_tokens)
+
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        extra_headers = {
+            **strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
+            **(extra_headers or {}),
+        }
+        return self._post(
+            "/v1/messages?beta=true",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessage,
+            stream=stream or False,
+            stream_cls=Stream[BetaRawMessageStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageStreamManager:
+        """Create a Message stream"""
+        extra_headers = {
+            "X-Stainless-Stream-Helper": "beta.messages",
+            **strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/v1/messages?beta=true",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                    "tools": tools,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessage,
+            stream=True,
+            stream_cls=Stream[BetaRawMessageStreamEvent],
+        )
+        return BetaMessageStreamManager(make_request)
+
+    def count_tokens(
+        self,
+        *,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[message_count_tokens_params.Tool] | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Learn more about token counting in our
+        [user guide](/en/docs/build-with-claude/token-counting)
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["token-counting-2024-11-01"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "token-counting-2024-11-01", **(extra_headers or {})}
+        return self._post(
+            "/v1/messages/count_tokens?beta=true",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessageTokensCount,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def batches(self) -> AsyncBatches:
+        return AsyncBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessage:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        stream: Literal[True],
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[BetaRawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        stream: bool,
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
+        if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = self._client._calculate_nonstreaming_timeout(max_tokens)
+
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        extra_headers = {
+            **strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
+            **(extra_headers or {}),
+        }
+        return await self._post(
+            "/v1/messages?beta=true",
+            body=await async_maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessage,
+            stream=stream or False,
+            stream_cls=AsyncStream[BetaRawMessageStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        metadata: BetaMetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[BetaToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaAsyncMessageStreamManager:
+        extra_headers = {
+            "X-Stainless-Stream-Helper": "beta.messages",
+            **strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            "/v1/messages",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                    "tools": tools,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessage,
+            stream=True,
+            stream_cls=AsyncStream[BetaRawMessageStreamEvent],
+        )
+        return BetaAsyncMessageStreamManager(request)
+
+    async def count_tokens(
+        self,
+        *,
+        messages: Iterable[BetaMessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[BetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[message_count_tokens_params.Tool] | NotGiven = NOT_GIVEN,
+        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaMessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Learn more about token counting in our
+        [user guide](/en/docs/build-with-claude/token-counting)
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          betas: Optional header to specify the beta version(s) you want to use.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {
+            **strip_not_given(
+                {
+                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["token-counting-2024-11-01"]))
+                    if is_given(betas)
+                    else NOT_GIVEN
+                }
+            ),
+            **(extra_headers or {}),
+        }
+        extra_headers = {"anthropic-beta": "token-counting-2024-11-01", **(extra_headers or {})}
+        return await self._post(
+            "/v1/messages/count_tokens?beta=true",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaMessageTokensCount,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = _legacy_response.to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithRawResponse:
+        return BatchesWithRawResponse(self._messages.batches)
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = _legacy_response.async_to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithRawResponse:
+        return AsyncBatchesWithRawResponse(self._messages.batches)
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithStreamingResponse:
+        return BatchesWithStreamingResponse(self._messages.batches)
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = async_to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithStreamingResponse:
+        return AsyncBatchesWithStreamingResponse(self._messages.batches)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/beta/models.py b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/models.py
new file mode 100644
index 00000000..04d620c5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/beta/models.py
@@ -0,0 +1,300 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage
+from ...types.beta import model_list_params
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.beta.beta_model_info import BetaModelInfo
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return self._get(
+            f"/v1/models/{model_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[BetaModelInfo]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models?beta=true",
+            page=SyncPage[BetaModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=BetaModelInfo,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return await self._get(
+            f"/v1/models/{model_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[BetaModelInfo, AsyncPage[BetaModelInfo]]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models?beta=true",
+            page=AsyncPage[BetaModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=BetaModelInfo,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/completions.py b/.venv/lib/python3.12/site-packages/anthropic/resources/completions.py
new file mode 100644
index 00000000..67e3977e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/completions.py
@@ -0,0 +1,823 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .. import _legacy_response
+from ..types import completion_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._constants import DEFAULT_TIMEOUT
+from .._streaming import Stream, AsyncStream
+from .._base_client import make_request_options
+from ..types.completion import Completion
+from ..types.model_param import ModelParam
+from ..types.metadata_param import MetadataParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens_to_sample: int,
+        model: ModelParam,
+        prompt: str,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion:
+        """[Legacy] Create a Text Completion.
+
+        The Text Completions API is a legacy API.
+
+        We recommend using the
+        [Messages API](https://docs.anthropic.com/en/api/messages) going forward.
+
+        Future models and features will not be compatible with Text Completions. See our
+        [migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
+        for guidance in migrating from Text Completions to Messages.
+
+        Args:
+          max_tokens_to_sample: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          prompt: The prompt that you want Claude to complete.
+
+              For proper response generation you will need to format your prompt using
+              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:
+
+              ```
+              "\n\nHuman: {userQuestion}\n\nAssistant:"
+              ```
+
+              See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
+              our guide to
+              [prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Sequences that will cause the model to stop generating.
+
+              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
+              sequences in the future. By providing the stop_sequences parameter, you may
+              include additional strings that will cause the model to stop generating.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/streaming) for details.
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens_to_sample: int,
+        model: ModelParam,
+        prompt: str,
+        stream: Literal[True],
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[Completion]:
+        """[Legacy] Create a Text Completion.
+
+        The Text Completions API is a legacy API.
+
+        We recommend using the
+        [Messages API](https://docs.anthropic.com/en/api/messages) going forward.
+
+        Future models and features will not be compatible with Text Completions. See our
+        [migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
+        for guidance in migrating from Text Completions to Messages.
+
+        Args:
+          max_tokens_to_sample: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          prompt: The prompt that you want Claude to complete.
+
+              For proper response generation you will need to format your prompt using
+              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:
+
+              ```
+              "\n\nHuman: {userQuestion}\n\nAssistant:"
+              ```
+
+              See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
+              our guide to
+              [prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
+              details.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/streaming) for details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Sequences that will cause the model to stop generating.
+
+              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
+              sequences in the future. By providing the stop_sequences parameter, you may
+              include additional strings that will cause the model to stop generating.
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens_to_sample: int,
+        model: ModelParam,
+        prompt: str,
+        stream: bool,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | Stream[Completion]:
+        """[Legacy] Create a Text Completion.
+
+        The Text Completions API is a legacy API.
+
+        We recommend using the
+        [Messages API](https://docs.anthropic.com/en/api/messages) going forward.
+
+        Future models and features will not be compatible with Text Completions. See our
+        [migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
+        for guidance in migrating from Text Completions to Messages.
+
+        Args:
+          max_tokens_to_sample: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          prompt: The prompt that you want Claude to complete.
+
+              For proper response generation you will need to format your prompt using
+              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:
+
+              ```
+              "\n\nHuman: {userQuestion}\n\nAssistant:"
+              ```
+
+              See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
+              our guide to
+              [prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
+              details.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/streaming) for details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Sequences that will cause the model to stop generating.
+
+              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
+              sequences in the future. By providing the stop_sequences parameter, you may
+              include additional strings that will cause the model to stop generating.
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens_to_sample", "model", "prompt"], ["max_tokens_to_sample", "model", "prompt", "stream"])
+    def create(
+        self,
+        *,
+        max_tokens_to_sample: int,
+        model: ModelParam,
+        prompt: str,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | Stream[Completion]:
+        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = 600
+        return self._post(
+            "/v1/complete",
+            body=maybe_transform(
+                {
+                    "max_tokens_to_sample": max_tokens_to_sample,
+                    "model": model,
+                    "prompt": prompt,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Completion,
+            stream=stream or False,
+            stream_cls=Stream[Completion],
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens_to_sample: int,
+        model: ModelParam,
+        prompt: str,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion:
+        """[Legacy] Create a Text Completion.
+
+        The Text Completions API is a legacy API.
+
+        We recommend using the
+        [Messages API](https://docs.anthropic.com/en/api/messages) going forward.
+
+        Future models and features will not be compatible with Text Completions. See our
+        [migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
+        for guidance in migrating from Text Completions to Messages.
+
+        Args:
+          max_tokens_to_sample: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          prompt: The prompt that you want Claude to complete.
+
+              For proper response generation you will need to format your prompt using
+              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:
+
+              ```
+              "\n\nHuman: {userQuestion}\n\nAssistant:"
+              ```
+
+              See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
+              our guide to
+              [prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Sequences that will cause the model to stop generating.
+
+              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
+              sequences in the future. By providing the stop_sequences parameter, you may
+              include additional strings that will cause the model to stop generating.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/streaming) for details.
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens_to_sample: int,
+        model: ModelParam,
+        prompt: str,
+        stream: Literal[True],
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[Completion]:
+        """[Legacy] Create a Text Completion.
+
+        The Text Completions API is a legacy API.
+
+        We recommend using the
+        [Messages API](https://docs.anthropic.com/en/api/messages) going forward.
+
+        Future models and features will not be compatible with Text Completions. See our
+        [migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
+        for guidance in migrating from Text Completions to Messages.
+
+        Args:
+          max_tokens_to_sample: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          prompt: The prompt that you want Claude to complete.
+
+              For proper response generation you will need to format your prompt using
+              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:
+
+              ```
+              "\n\nHuman: {userQuestion}\n\nAssistant:"
+              ```
+
+              See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
+              our guide to
+              [prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
+              details.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/streaming) for details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Sequences that will cause the model to stop generating.
+
+              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
+              sequences in the future. By providing the stop_sequences parameter, you may
+              include additional strings that will cause the model to stop generating.
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens_to_sample: int,
+        model: ModelParam,
+        prompt: str,
+        stream: bool,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | AsyncStream[Completion]:
+        """[Legacy] Create a Text Completion.
+
+        The Text Completions API is a legacy API.
+
+        We recommend using the
+        [Messages API](https://docs.anthropic.com/en/api/messages) going forward.
+
+        Future models and features will not be compatible with Text Completions. See our
+        [migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
+        for guidance in migrating from Text Completions to Messages.
+
+        Args:
+          max_tokens_to_sample: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          prompt: The prompt that you want Claude to complete.
+
+              For proper response generation you will need to format your prompt using
+              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:
+
+              ```
+              "\n\nHuman: {userQuestion}\n\nAssistant:"
+              ```
+
+              See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
+              our guide to
+              [prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
+              details.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/streaming) for details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Sequences that will cause the model to stop generating.
+
+              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
+              sequences in the future. By providing the stop_sequences parameter, you may
+              include additional strings that will cause the model to stop generating.
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens_to_sample", "model", "prompt"], ["max_tokens_to_sample", "model", "prompt", "stream"])
+    async def create(
+        self,
+        *,
+        max_tokens_to_sample: int,
+        model: ModelParam,
+        prompt: str,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | AsyncStream[Completion]:
+        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = 600
+        return await self._post(
+            "/v1/complete",
+            body=await async_maybe_transform(
+                {
+                    "max_tokens_to_sample": max_tokens_to_sample,
+                    "model": model,
+                    "prompt": prompt,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Completion,
+            stream=stream or False,
+            stream_cls=AsyncStream[Completion],
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/messages/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/__init__.py
new file mode 100644
index 00000000..6e7cf9d9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/__init__.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from .messages import (
+    DEPRECATED_MODELS,
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "DEPRECATED_MODELS",
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/messages/batches.py b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/batches.py
new file mode 100644
index 00000000..4ebd8fd4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/batches.py
@@ -0,0 +1,717 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage
+from ..._exceptions import AnthropicError
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.messages import batch_list_params, batch_create_params
+from ..._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder
+from ...types.messages.message_batch import MessageBatch
+from ...types.messages.deleted_message_batch import DeletedMessageBatch
+from ...types.messages.message_batch_individual_response import MessageBatchIndividualResponse
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/messages/batches",
+            body=maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._get(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[MessageBatch]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/messages/batches",
+            page=SyncPage[MessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=MessageBatch,
+        )
+
+    def delete(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> DeletedMessageBatch:
+        """
+        Delete a Message Batch.
+
+        Message Batches can only be deleted once they've finished processing. If you'd
+        like to delete an in-progress batch, you must first cancel it.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._delete(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DeletedMessageBatch,
+        )
+
+    def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def results(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JSONLDecoder[MessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=JSONLDecoder[MessageBatchIndividualResponse],
+            stream=True,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/messages/batches",
+            body=await async_maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    async def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._get(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[MessageBatch, AsyncPage[MessageBatch]]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/messages/batches",
+            page=AsyncPage[MessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=MessageBatch,
+        )
+
+    async def delete(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> DeletedMessageBatch:
+        """
+        Delete a Message Batch.
+
+        Message Batches can only be deleted once they've finished processing. If you'd
+        like to delete an in-progress batch, you must first cancel it.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._delete(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DeletedMessageBatch,
+        )
+
+    async def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    async def results(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncJSONLDecoder[MessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = await self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AsyncJSONLDecoder[MessageBatchIndividualResponse],
+            stream=True,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/messages/messages.py b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/messages.py
new file mode 100644
index 00000000..70bceb7f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/messages.py
@@ -0,0 +1,2551 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import warnings
+from typing import List, Union, Iterable
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ... import _legacy_response
+from ...types import (
+    ThinkingConfigParam,
+    message_create_params,
+    message_count_tokens_params,
+)
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._constants import DEFAULT_TIMEOUT
+from ..._streaming import Stream, AsyncStream
+from ..._base_client import make_request_options
+from ...lib.streaming import MessageStreamManager, AsyncMessageStreamManager
+from ...types.message import Message
+from ...types.model_param import ModelParam
+from ...types.message_param import MessageParam
+from ...types.metadata_param import MetadataParam
+from ...types.text_block_param import TextBlockParam
+from ...types.tool_union_param import ToolUnionParam
+from ...types.tool_choice_param import ToolChoiceParam
+from ...types.message_tokens_count import MessageTokensCount
+from ...types.thinking_config_param import ThinkingConfigParam
+from ...types.raw_message_stream_event import RawMessageStreamEvent
+from ...types.message_count_tokens_tool_param import MessageCountTokensToolParam
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+DEPRECATED_MODELS = {
+    "claude-1.3": "November 6th, 2024",
+    "claude-1.3-100k": "November 6th, 2024",
+    "claude-instant-1.1": "November 6th, 2024",
+    "claude-instant-1.1-100k": "November 6th, 2024",
+    "claude-instant-1.2": "November 6th, 2024",
+    "claude-3-sonnet-20240229": "July 21st, 2025",
+    "claude-2.1": "July 21st, 2025",
+    "claude-2.0": "July 21st, 2025",
+}
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def batches(self) -> Batches:
+        return Batches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        stream: Literal[True],
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[RawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        stream: bool,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message | Stream[RawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message | Stream[RawMessageStreamEvent]:
+        if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = self._client._calculate_nonstreaming_timeout(max_tokens)
+
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        return self._post(
+            "/v1/messages",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+            stream=stream or False,
+            stream_cls=Stream[RawMessageStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageStreamManager:
+        """Create a Message stream"""
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        extra_headers = {
+            "X-Stainless-Stream-Helper": "messages",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/v1/messages",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "system": system,
+                    "temperature": temperature,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                    "tools": tools,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+            stream=True,
+            stream_cls=Stream[RawMessageStreamEvent],
+        )
+        return MessageStreamManager(make_request)
+
+    def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[MessageCountTokensToolParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Learn more about token counting in our
+        [user guide](/en/docs/build-with-claude/token-counting)
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/messages/count_tokens",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageTokensCount,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def batches(self) -> AsyncBatches:
+        return AsyncBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        stream: Literal[True],
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[RawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        stream: bool,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message | AsyncStream[RawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message | AsyncStream[RawMessageStreamEvent]:
+        if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = self._client._calculate_nonstreaming_timeout(max_tokens)
+
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        return await self._post(
+            "/v1/messages",
+            body=await async_maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+            stream=stream or False,
+            stream_cls=AsyncStream[RawMessageStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncMessageStreamManager:
+        """Create a Message stream"""
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        extra_headers = {
+            "X-Stainless-Stream-Helper": "messages",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            "/v1/messages",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "system": system,
+                    "temperature": temperature,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                    "tools": tools,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+            stream=True,
+            stream_cls=AsyncStream[RawMessageStreamEvent],
+        )
+        return AsyncMessageStreamManager(request)
+
+    async def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[MessageCountTokensToolParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Learn more about token counting in our
+        [user guide](/en/docs/build-with-claude/token-counting)
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/messages/count_tokens",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageTokensCount,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = _legacy_response.to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithRawResponse:
+        return BatchesWithRawResponse(self._messages.batches)
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = _legacy_response.async_to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithRawResponse:
+        return AsyncBatchesWithRawResponse(self._messages.batches)
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithStreamingResponse:
+        return BatchesWithStreamingResponse(self._messages.batches)
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = async_to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithStreamingResponse:
+        return AsyncBatchesWithStreamingResponse(self._messages.batches)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/models.py b/.venv/lib/python3.12/site-packages/anthropic/resources/models.py
new file mode 100644
index 00000000..3469ccf9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/models.py
@@ -0,0 +1,300 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .. import _legacy_response
+from ..types import model_list_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncPage, AsyncPage
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.model_info import ModelInfo
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return self._get(
+            f"/v1/models/{model_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[ModelInfo]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models",
+            page=SyncPage[ModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=ModelInfo,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return await self._get(
+            f"/v1/models/{model_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ModelInfo, AsyncPage[ModelInfo]]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models",
+            page=AsyncPage[ModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=ModelInfo,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/types/__init__.py
new file mode 100644
index 00000000..94196102
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/__init__.py
@@ -0,0 +1,107 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .model import Model as Model
+from .usage import Usage as Usage
+from .shared import (
+    ErrorObject as ErrorObject,
+    BillingError as BillingError,
+    ErrorResponse as ErrorResponse,
+    NotFoundError as NotFoundError,
+    APIErrorObject as APIErrorObject,
+    RateLimitError as RateLimitError,
+    OverloadedError as OverloadedError,
+    PermissionError as PermissionError,
+    AuthenticationError as AuthenticationError,
+    GatewayTimeoutError as GatewayTimeoutError,
+    InvalidRequestError as InvalidRequestError,
+)
+from .message import Message as Message
+from .beta_error import BetaError as BetaError
+from .completion import Completion as Completion
+from .model_info import ModelInfo as ModelInfo
+from .text_block import TextBlock as TextBlock
+from .text_delta import TextDelta as TextDelta
+from .tool_param import ToolParam as ToolParam
+from .model_param import ModelParam as ModelParam
+from .content_block import ContentBlock as ContentBlock
+from .message_param import MessageParam as MessageParam
+from .text_citation import TextCitation as TextCitation
+from .beta_api_error import BetaAPIError as BetaAPIError
+from .metadata_param import MetadataParam as MetadataParam
+from .thinking_block import ThinkingBlock as ThinkingBlock
+from .thinking_delta import ThinkingDelta as ThinkingDelta
+from .tool_use_block import ToolUseBlock as ToolUseBlock
+from .citations_delta import CitationsDelta as CitationsDelta
+from .signature_delta import SignatureDelta as SignatureDelta
+from .input_json_delta import InputJSONDelta as InputJSONDelta
+from .text_block_param import TextBlockParam as TextBlockParam
+from .tool_union_param import ToolUnionParam as ToolUnionParam
+from .image_block_param import ImageBlockParam as ImageBlockParam
+from .model_list_params import ModelListParams as ModelListParams
+from .tool_choice_param import ToolChoiceParam as ToolChoiceParam
+from .beta_billing_error import BetaBillingError as BetaBillingError
+from .message_stop_event import MessageStopEvent as MessageStopEvent
+from .beta_error_response import BetaErrorResponse as BetaErrorResponse
+from .content_block_param import ContentBlockParam as ContentBlockParam
+from .message_delta_event import MessageDeltaEvent as MessageDeltaEvent
+from .message_delta_usage import MessageDeltaUsage as MessageDeltaUsage
+from .message_start_event import MessageStartEvent as MessageStartEvent
+from .text_citation_param import TextCitationParam as TextCitationParam
+from .anthropic_beta_param import AnthropicBetaParam as AnthropicBetaParam
+from .beta_not_found_error import BetaNotFoundError as BetaNotFoundError
+from .document_block_param import DocumentBlockParam as DocumentBlockParam
+from .message_stream_event import MessageStreamEvent as MessageStreamEvent
+from .message_tokens_count import MessageTokensCount as MessageTokensCount
+from .thinking_block_param import ThinkingBlockParam as ThinkingBlockParam
+from .tool_use_block_param import ToolUseBlockParam as ToolUseBlockParam
+from .url_pdf_source_param import URLPDFSourceParam as URLPDFSourceParam
+from .beta_overloaded_error import BetaOverloadedError as BetaOverloadedError
+from .beta_permission_error import BetaPermissionError as BetaPermissionError
+from .beta_rate_limit_error import BetaRateLimitError as BetaRateLimitError
+from .message_create_params import MessageCreateParams as MessageCreateParams
+from .thinking_config_param import ThinkingConfigParam as ThinkingConfigParam
+from .tool_choice_any_param import ToolChoiceAnyParam as ToolChoiceAnyParam
+from .citation_char_location import CitationCharLocation as CitationCharLocation
+from .citation_page_location import CitationPageLocation as CitationPageLocation
+from .citations_config_param import CitationsConfigParam as CitationsConfigParam
+from .raw_message_stop_event import RawMessageStopEvent as RawMessageStopEvent
+from .tool_choice_auto_param import ToolChoiceAutoParam as ToolChoiceAutoParam
+from .tool_choice_none_param import ToolChoiceNoneParam as ToolChoiceNoneParam
+from .tool_choice_tool_param import ToolChoiceToolParam as ToolChoiceToolParam
+from .url_image_source_param import URLImageSourceParam as URLImageSourceParam
+from .base64_pdf_source_param import Base64PDFSourceParam as Base64PDFSourceParam
+from .plain_text_source_param import PlainTextSourceParam as PlainTextSourceParam
+from .raw_message_delta_event import RawMessageDeltaEvent as RawMessageDeltaEvent
+from .raw_message_start_event import RawMessageStartEvent as RawMessageStartEvent
+from .redacted_thinking_block import RedactedThinkingBlock as RedactedThinkingBlock
+from .tool_result_block_param import ToolResultBlockParam as ToolResultBlockParam
+from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .content_block_stop_event import ContentBlockStopEvent as ContentBlockStopEvent
+from .raw_message_stream_event import RawMessageStreamEvent as RawMessageStreamEvent
+from .tool_bash_20250124_param import ToolBash20250124Param as ToolBash20250124Param
+from .base64_image_source_param import Base64ImageSourceParam as Base64ImageSourceParam
+from .beta_authentication_error import BetaAuthenticationError as BetaAuthenticationError
+from .content_block_delta_event import ContentBlockDeltaEvent as ContentBlockDeltaEvent
+from .content_block_start_event import ContentBlockStartEvent as ContentBlockStartEvent
+from .beta_gateway_timeout_error import BetaGatewayTimeoutError as BetaGatewayTimeoutError
+from .beta_invalid_request_error import BetaInvalidRequestError as BetaInvalidRequestError
+from .content_block_source_param import ContentBlockSourceParam as ContentBlockSourceParam
+from .message_count_tokens_params import MessageCountTokensParams as MessageCountTokensParams
+from .citation_char_location_param import CitationCharLocationParam as CitationCharLocationParam
+from .citation_page_location_param import CitationPageLocationParam as CitationPageLocationParam
+from .raw_content_block_stop_event import RawContentBlockStopEvent as RawContentBlockStopEvent
+from .cache_control_ephemeral_param import CacheControlEphemeralParam as CacheControlEphemeralParam
+from .raw_content_block_delta_event import RawContentBlockDeltaEvent as RawContentBlockDeltaEvent
+from .raw_content_block_start_event import RawContentBlockStartEvent as RawContentBlockStartEvent
+from .redacted_thinking_block_param import RedactedThinkingBlockParam as RedactedThinkingBlockParam
+from .thinking_config_enabled_param import ThinkingConfigEnabledParam as ThinkingConfigEnabledParam
+from .thinking_config_disabled_param import ThinkingConfigDisabledParam as ThinkingConfigDisabledParam
+from .citation_content_block_location import CitationContentBlockLocation as CitationContentBlockLocation
+from .message_count_tokens_tool_param import MessageCountTokensToolParam as MessageCountTokensToolParam
+from .tool_text_editor_20250124_param import ToolTextEditor20250124Param as ToolTextEditor20250124Param
+from .content_block_source_content_param import ContentBlockSourceContentParam as ContentBlockSourceContentParam
+from .citation_content_block_location_param import (
+    CitationContentBlockLocationParam as CitationContentBlockLocationParam,
+)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/anthropic_beta_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/anthropic_beta_param.py
new file mode 100644
index 00000000..ff5fdffd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/anthropic_beta_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["AnthropicBetaParam"]
+
+AnthropicBetaParam: TypeAlias = Union[
+    str,
+    Literal[
+        "message-batches-2024-09-24",
+        "prompt-caching-2024-07-31",
+        "computer-use-2024-10-22",
+        "computer-use-2025-01-24",
+        "pdfs-2024-09-25",
+        "token-counting-2024-11-01",
+        "token-efficient-tools-2025-02-19",
+        "output-128k-2025-02-19",
+    ],
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/base64_image_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/base64_image_source_param.py
new file mode 100644
index 00000000..93fdb9d1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/base64_image_source_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from .._types import Base64FileInput
+from .._utils import PropertyInfo
+from .._models import set_pydantic_config
+
+__all__ = ["Base64ImageSourceParam"]
+
+
+class Base64ImageSourceParam(TypedDict, total=False):
+    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
+
+    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+
+    type: Required[Literal["base64"]]
+
+
+set_pydantic_config(Base64ImageSourceParam, {"arbitrary_types_allowed": True})
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/base64_pdf_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/base64_pdf_source_param.py
new file mode 100644
index 00000000..ac247a19
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/base64_pdf_source_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from .._types import Base64FileInput
+from .._utils import PropertyInfo
+from .._models import set_pydantic_config
+
+__all__ = ["Base64PDFSourceParam"]
+
+
+class Base64PDFSourceParam(TypedDict, total=False):
+    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
+
+    media_type: Required[Literal["application/pdf"]]
+
+    type: Required[Literal["base64"]]
+
+
+set_pydantic_config(Base64PDFSourceParam, {"arbitrary_types_allowed": True})
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/__init__.py
new file mode 100644
index 00000000..916b0ab6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/__init__.py
@@ -0,0 +1,76 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .beta_usage import BetaUsage as BetaUsage
+from .beta_message import BetaMessage as BetaMessage
+from .beta_model_info import BetaModelInfo as BetaModelInfo
+from .beta_text_block import BetaTextBlock as BetaTextBlock
+from .beta_text_delta import BetaTextDelta as BetaTextDelta
+from .beta_tool_param import BetaToolParam as BetaToolParam
+from .model_list_params import ModelListParams as ModelListParams
+from .beta_content_block import BetaContentBlock as BetaContentBlock
+from .beta_message_param import BetaMessageParam as BetaMessageParam
+from .beta_text_citation import BetaTextCitation as BetaTextCitation
+from .beta_metadata_param import BetaMetadataParam as BetaMetadataParam
+from .beta_thinking_block import BetaThinkingBlock as BetaThinkingBlock
+from .beta_thinking_delta import BetaThinkingDelta as BetaThinkingDelta
+from .beta_tool_use_block import BetaToolUseBlock as BetaToolUseBlock
+from .beta_citations_delta import BetaCitationsDelta as BetaCitationsDelta
+from .beta_signature_delta import BetaSignatureDelta as BetaSignatureDelta
+from .beta_input_json_delta import BetaInputJSONDelta as BetaInputJSONDelta
+from .beta_text_block_param import BetaTextBlockParam as BetaTextBlockParam
+from .beta_tool_union_param import BetaToolUnionParam as BetaToolUnionParam
+from .message_create_params import MessageCreateParams as MessageCreateParams
+from .beta_image_block_param import BetaImageBlockParam as BetaImageBlockParam
+from .beta_tool_choice_param import BetaToolChoiceParam as BetaToolChoiceParam
+from .beta_content_block_param import BetaContentBlockParam as BetaContentBlockParam
+from .beta_message_delta_usage import BetaMessageDeltaUsage as BetaMessageDeltaUsage
+from .beta_text_citation_param import BetaTextCitationParam as BetaTextCitationParam
+from .beta_message_tokens_count import BetaMessageTokensCount as BetaMessageTokensCount
+from .beta_thinking_block_param import BetaThinkingBlockParam as BetaThinkingBlockParam
+from .beta_tool_use_block_param import BetaToolUseBlockParam as BetaToolUseBlockParam
+from .beta_url_pdf_source_param import BetaURLPDFSourceParam as BetaURLPDFSourceParam
+from .beta_thinking_config_param import BetaThinkingConfigParam as BetaThinkingConfigParam
+from .beta_tool_choice_any_param import BetaToolChoiceAnyParam as BetaToolChoiceAnyParam
+from .beta_base64_pdf_block_param import BetaBase64PDFBlockParam as BetaBase64PDFBlockParam
+from .beta_citation_char_location import BetaCitationCharLocation as BetaCitationCharLocation
+from .beta_citation_page_location import BetaCitationPageLocation as BetaCitationPageLocation
+from .beta_citations_config_param import BetaCitationsConfigParam as BetaCitationsConfigParam
+from .beta_raw_message_stop_event import BetaRawMessageStopEvent as BetaRawMessageStopEvent
+from .beta_tool_choice_auto_param import BetaToolChoiceAutoParam as BetaToolChoiceAutoParam
+from .beta_tool_choice_none_param import BetaToolChoiceNoneParam as BetaToolChoiceNoneParam
+from .beta_tool_choice_tool_param import BetaToolChoiceToolParam as BetaToolChoiceToolParam
+from .beta_url_image_source_param import BetaURLImageSourceParam as BetaURLImageSourceParam
+from .message_count_tokens_params import MessageCountTokensParams as MessageCountTokensParams
+from .beta_base64_pdf_source_param import BetaBase64PDFSourceParam as BetaBase64PDFSourceParam
+from .beta_plain_text_source_param import BetaPlainTextSourceParam as BetaPlainTextSourceParam
+from .beta_raw_message_delta_event import BetaRawMessageDeltaEvent as BetaRawMessageDeltaEvent
+from .beta_raw_message_start_event import BetaRawMessageStartEvent as BetaRawMessageStartEvent
+from .beta_redacted_thinking_block import BetaRedactedThinkingBlock as BetaRedactedThinkingBlock
+from .beta_tool_result_block_param import BetaToolResultBlockParam as BetaToolResultBlockParam
+from .beta_raw_message_stream_event import BetaRawMessageStreamEvent as BetaRawMessageStreamEvent
+from .beta_tool_bash_20241022_param import BetaToolBash20241022Param as BetaToolBash20241022Param
+from .beta_tool_bash_20250124_param import BetaToolBash20250124Param as BetaToolBash20250124Param
+from .beta_base64_image_source_param import BetaBase64ImageSourceParam as BetaBase64ImageSourceParam
+from .beta_content_block_source_param import BetaContentBlockSourceParam as BetaContentBlockSourceParam
+from .beta_citation_char_location_param import BetaCitationCharLocationParam as BetaCitationCharLocationParam
+from .beta_citation_page_location_param import BetaCitationPageLocationParam as BetaCitationPageLocationParam
+from .beta_raw_content_block_stop_event import BetaRawContentBlockStopEvent as BetaRawContentBlockStopEvent
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam as BetaCacheControlEphemeralParam
+from .beta_raw_content_block_delta_event import BetaRawContentBlockDeltaEvent as BetaRawContentBlockDeltaEvent
+from .beta_raw_content_block_start_event import BetaRawContentBlockStartEvent as BetaRawContentBlockStartEvent
+from .beta_redacted_thinking_block_param import BetaRedactedThinkingBlockParam as BetaRedactedThinkingBlockParam
+from .beta_thinking_config_enabled_param import BetaThinkingConfigEnabledParam as BetaThinkingConfigEnabledParam
+from .beta_thinking_config_disabled_param import BetaThinkingConfigDisabledParam as BetaThinkingConfigDisabledParam
+from .beta_citation_content_block_location import BetaCitationContentBlockLocation as BetaCitationContentBlockLocation
+from .beta_tool_text_editor_20241022_param import BetaToolTextEditor20241022Param as BetaToolTextEditor20241022Param
+from .beta_tool_text_editor_20250124_param import BetaToolTextEditor20250124Param as BetaToolTextEditor20250124Param
+from .beta_tool_computer_use_20241022_param import BetaToolComputerUse20241022Param as BetaToolComputerUse20241022Param
+from .beta_tool_computer_use_20250124_param import BetaToolComputerUse20250124Param as BetaToolComputerUse20250124Param
+from .beta_content_block_source_content_param import (
+    BetaContentBlockSourceContentParam as BetaContentBlockSourceContentParam,
+)
+from .beta_citation_content_block_location_param import (
+    BetaCitationContentBlockLocationParam as BetaCitationContentBlockLocationParam,
+)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_image_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_image_source_param.py
new file mode 100644
index 00000000..8f13ce38
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_image_source_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from ..._types import Base64FileInput
+from ..._utils import PropertyInfo
+from ..._models import set_pydantic_config
+
+__all__ = ["BetaBase64ImageSourceParam"]
+
+
+class BetaBase64ImageSourceParam(TypedDict, total=False):
+    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
+
+    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+
+    type: Required[Literal["base64"]]
+
+
+set_pydantic_config(BetaBase64ImageSourceParam, {"arbitrary_types_allowed": True})
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_pdf_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_pdf_block_param.py
new file mode 100644
index 00000000..16f51a9d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_pdf_block_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .beta_url_pdf_source_param import BetaURLPDFSourceParam
+from .beta_citations_config_param import BetaCitationsConfigParam
+from .beta_base64_pdf_source_param import BetaBase64PDFSourceParam
+from .beta_plain_text_source_param import BetaPlainTextSourceParam
+from .beta_content_block_source_param import BetaContentBlockSourceParam
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaBase64PDFBlockParam", "Source"]
+
+Source: TypeAlias = Union[
+    BetaBase64PDFSourceParam, BetaPlainTextSourceParam, BetaContentBlockSourceParam, BetaURLPDFSourceParam
+]
+
+
+class BetaBase64PDFBlockParam(TypedDict, total=False):
+    source: Required[Source]
+
+    type: Required[Literal["document"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
+
+    citations: BetaCitationsConfigParam
+
+    context: Optional[str]
+
+    title: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_pdf_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_pdf_source_param.py
new file mode 100644
index 00000000..1137c957
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_base64_pdf_source_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from ..._types import Base64FileInput
+from ..._utils import PropertyInfo
+from ..._models import set_pydantic_config
+
+__all__ = ["BetaBase64PDFSourceParam"]
+
+
+class BetaBase64PDFSourceParam(TypedDict, total=False):
+    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
+
+    media_type: Required[Literal["application/pdf"]]
+
+    type: Required[Literal["base64"]]
+
+
+set_pydantic_config(BetaBase64PDFSourceParam, {"arbitrary_types_allowed": True})
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_cache_control_ephemeral_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_cache_control_ephemeral_param.py
new file mode 100644
index 00000000..540d769d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_cache_control_ephemeral_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaCacheControlEphemeralParam"]
+
+
+class BetaCacheControlEphemeralParam(TypedDict, total=False):
+    type: Required[Literal["ephemeral"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_char_location.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_char_location.py
new file mode 100644
index 00000000..2109949a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_char_location.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaCitationCharLocation"]
+
+
+class BetaCitationCharLocation(BaseModel):
+    cited_text: str
+
+    document_index: int
+
+    document_title: Optional[str] = None
+
+    end_char_index: int
+
+    start_char_index: int
+
+    type: Literal["char_location"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_char_location_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_char_location_param.py
new file mode 100644
index 00000000..8c09f5a7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_char_location_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaCitationCharLocationParam"]
+
+
+class BetaCitationCharLocationParam(TypedDict, total=False):
+    cited_text: Required[str]
+
+    document_index: Required[int]
+
+    document_title: Required[Optional[str]]
+
+    end_char_index: Required[int]
+
+    start_char_index: Required[int]
+
+    type: Required[Literal["char_location"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_content_block_location.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_content_block_location.py
new file mode 100644
index 00000000..8fde76f9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_content_block_location.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaCitationContentBlockLocation"]
+
+
+class BetaCitationContentBlockLocation(BaseModel):
+    cited_text: str
+
+    document_index: int
+
+    document_title: Optional[str] = None
+
+    end_block_index: int
+
+    start_block_index: int
+
+    type: Literal["content_block_location"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_content_block_location_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_content_block_location_param.py
new file mode 100644
index 00000000..9e378a78
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_content_block_location_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaCitationContentBlockLocationParam"]
+
+
+class BetaCitationContentBlockLocationParam(TypedDict, total=False):
+    cited_text: Required[str]
+
+    document_index: Required[int]
+
+    document_title: Required[Optional[str]]
+
+    end_block_index: Required[int]
+
+    start_block_index: Required[int]
+
+    type: Required[Literal["content_block_location"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_page_location.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_page_location.py
new file mode 100644
index 00000000..9e6f60dd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_page_location.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaCitationPageLocation"]
+
+
+class BetaCitationPageLocation(BaseModel):
+    cited_text: str
+
+    document_index: int
+
+    document_title: Optional[str] = None
+
+    end_page_number: int
+
+    start_page_number: int
+
+    type: Literal["page_location"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_page_location_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_page_location_param.py
new file mode 100644
index 00000000..60e5b1c2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citation_page_location_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaCitationPageLocationParam"]
+
+
+class BetaCitationPageLocationParam(TypedDict, total=False):
+    cited_text: Required[str]
+
+    document_index: Required[int]
+
+    document_title: Required[Optional[str]]
+
+    end_page_number: Required[int]
+
+    start_page_number: Required[int]
+
+    type: Required[Literal["page_location"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citations_config_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citations_config_param.py
new file mode 100644
index 00000000..409cfde7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citations_config_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BetaCitationsConfigParam"]
+
+
+class BetaCitationsConfigParam(TypedDict, total=False):
+    enabled: bool
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citations_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citations_delta.py
new file mode 100644
index 00000000..2c6c02b2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_citations_delta.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .beta_citation_char_location import BetaCitationCharLocation
+from .beta_citation_page_location import BetaCitationPageLocation
+from .beta_citation_content_block_location import BetaCitationContentBlockLocation
+
+__all__ = ["BetaCitationsDelta", "Citation"]
+
+Citation: TypeAlias = Annotated[
+    Union[BetaCitationCharLocation, BetaCitationPageLocation, BetaCitationContentBlockLocation],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class BetaCitationsDelta(BaseModel):
+    citation: Citation
+
+    type: Literal["citations_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block.py
new file mode 100644
index 00000000..7cf9736e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .beta_text_block import BetaTextBlock
+from .beta_thinking_block import BetaThinkingBlock
+from .beta_tool_use_block import BetaToolUseBlock
+from .beta_redacted_thinking_block import BetaRedactedThinkingBlock
+
+__all__ = ["BetaContentBlock"]
+
+BetaContentBlock: TypeAlias = Annotated[
+    Union[BetaTextBlock, BetaToolUseBlock, BetaThinkingBlock, BetaRedactedThinkingBlock],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_param.py
new file mode 100644
index 00000000..1768f321
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .beta_text_block_param import BetaTextBlockParam
+from .beta_image_block_param import BetaImageBlockParam
+from .beta_thinking_block_param import BetaThinkingBlockParam
+from .beta_tool_use_block_param import BetaToolUseBlockParam
+from .beta_base64_pdf_block_param import BetaBase64PDFBlockParam
+from .beta_tool_result_block_param import BetaToolResultBlockParam
+from .beta_redacted_thinking_block_param import BetaRedactedThinkingBlockParam
+
+__all__ = ["BetaContentBlockParam"]
+
+BetaContentBlockParam: TypeAlias = Union[
+    BetaTextBlockParam,
+    BetaImageBlockParam,
+    BetaToolUseBlockParam,
+    BetaToolResultBlockParam,
+    BetaBase64PDFBlockParam,
+    BetaThinkingBlockParam,
+    BetaRedactedThinkingBlockParam,
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_source_content_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_source_content_param.py
new file mode 100644
index 00000000..bc13b146
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_source_content_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .beta_text_block_param import BetaTextBlockParam
+from .beta_image_block_param import BetaImageBlockParam
+
+__all__ = ["BetaContentBlockSourceContentParam"]
+
+BetaContentBlockSourceContentParam: TypeAlias = Union[BetaTextBlockParam, BetaImageBlockParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_source_param.py
new file mode 100644
index 00000000..512cf0db
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_content_block_source_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_content_block_source_content_param import BetaContentBlockSourceContentParam
+
+__all__ = ["BetaContentBlockSourceParam"]
+
+
+class BetaContentBlockSourceParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[BetaContentBlockSourceContentParam]]]
+
+    type: Required[Literal["content"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_image_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_image_block_param.py
new file mode 100644
index 00000000..ddb9d4c0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_image_block_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .beta_url_image_source_param import BetaURLImageSourceParam
+from .beta_base64_image_source_param import BetaBase64ImageSourceParam
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaImageBlockParam", "Source"]
+
+Source: TypeAlias = Union[BetaBase64ImageSourceParam, BetaURLImageSourceParam]
+
+
+class BetaImageBlockParam(TypedDict, total=False):
+    source: Required[Source]
+
+    type: Required[Literal["image"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_input_json_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_input_json_delta.py
new file mode 100644
index 00000000..a5f9cbea
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_input_json_delta.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaInputJSONDelta"]
+
+
+class BetaInputJSONDelta(BaseModel):
+    partial_json: str
+
+    type: Literal["input_json_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message.py
new file mode 100644
index 00000000..a4d6cdec
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message.py
@@ -0,0 +1,112 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..model import Model
+from ..._models import BaseModel
+from .beta_usage import BetaUsage
+from .beta_content_block import BetaContentBlock, BetaContentBlock as BetaContentBlock
+
+__all__ = ["BetaMessage"]
+
+
+class BetaMessage(BaseModel):
+    id: str
+    """Unique object identifier.
+
+    The format and length of IDs may change over time.
+    """
+
+    content: List[BetaContentBlock]
+    """Content generated by the model.
+
+    This is an array of content blocks, each of which has a `type` that determines
+    its shape.
+
+    Example:
+
+    ```json
+    [{ "type": "text", "text": "Hi, I'm Claude." }]
+    ```
+
+    If the request input `messages` ended with an `assistant` turn, then the
+    response `content` will continue directly from that last turn. You can use this
+    to constrain the model's output.
+
+    For example, if the input `messages` were:
+
+    ```json
+    [
+      {
+        "role": "user",
+        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+      },
+      { "role": "assistant", "content": "The best answer is (" }
+    ]
+    ```
+
+    Then the response `content` might be:
+
+    ```json
+    [{ "type": "text", "text": "B)" }]
+    ```
+    """
+
+    model: Model
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    role: Literal["assistant"]
+    """Conversational role of the generated message.
+
+    This will always be `"assistant"`.
+    """
+
+    stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None
+    """The reason that we stopped.
+
+    This may be one the following values:
+
+    - `"end_turn"`: the model reached a natural stopping point
+    - `"max_tokens"`: we exceeded the requested `max_tokens` or the model's maximum
+    - `"stop_sequence"`: one of your provided custom `stop_sequences` was generated
+    - `"tool_use"`: the model invoked one or more tools
+
+    In non-streaming mode this value is always non-null. In streaming mode, it is
+    null in the `message_start` event and non-null otherwise.
+    """
+
+    stop_sequence: Optional[str] = None
+    """Which custom stop sequence was generated, if any.
+
+    This value will be a non-null string if one of your custom stop sequences was
+    generated.
+    """
+
+    type: Literal["message"]
+    """Object type.
+
+    For Messages, this is always `"message"`.
+    """
+
+    usage: BetaUsage
+    """Billing and rate-limit usage.
+
+    Anthropic's API bills and rate-limits by token counts, as tokens represent the
+    underlying cost to our systems.
+
+    Under the hood, the API transforms requests into a format suitable for the
+    model. The model's output then goes through a parsing stage before becoming an
+    API response. As a result, the token counts in `usage` will not match one-to-one
+    with the exact visible content of an API request or response.
+
+    For example, `output_tokens` will be non-zero, even for an empty string response
+    from Claude.
+
+    Total input tokens in a request is the summation of `input_tokens`,
+    `cache_creation_input_tokens`, and `cache_read_input_tokens`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_delta_usage.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_delta_usage.py
new file mode 100644
index 00000000..cc681911
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_delta_usage.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["BetaMessageDeltaUsage"]
+
+
+class BetaMessageDeltaUsage(BaseModel):
+    output_tokens: int
+    """The cumulative number of output tokens which were used."""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_param.py
new file mode 100644
index 00000000..b41e56d3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_content_block_param import BetaContentBlockParam
+
+__all__ = ["BetaMessageParam"]
+
+
+class BetaMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[BetaContentBlockParam]]]
+
+    role: Required[Literal["user", "assistant"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_tokens_count.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_tokens_count.py
new file mode 100644
index 00000000..e11daee7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_message_tokens_count.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["BetaMessageTokensCount"]
+
+
+class BetaMessageTokensCount(BaseModel):
+    input_tokens: int
+    """
+    The total number of tokens across the provided list of messages, system prompt,
+    and tools.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_metadata_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_metadata_param.py
new file mode 100644
index 00000000..8ccda216
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_metadata_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+__all__ = ["BetaMetadataParam"]
+
+
+class BetaMetadataParam(TypedDict, total=False):
+    user_id: Optional[str]
+    """An external identifier for the user who is associated with the request.
+
+    This should be a uuid, hash value, or other opaque identifier. Anthropic may use
+    this id to help detect abuse. Do not include any identifying information such as
+    name, email address, or phone number.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_model_info.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_model_info.py
new file mode 100644
index 00000000..6ea50d9f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_model_info.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaModelInfo"]
+
+
+class BetaModelInfo(BaseModel):
+    id: str
+    """Unique model identifier."""
+
+    created_at: datetime
+    """RFC 3339 datetime string representing the time at which the model was released.
+
+    May be set to an epoch value if the release date is unknown.
+    """
+
+    display_name: str
+    """A human-readable name for the model."""
+
+    type: Literal["model"]
+    """Object type.
+
+    For Models, this is always `"model"`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_plain_text_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_plain_text_source_param.py
new file mode 100644
index 00000000..187a2386
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_plain_text_source_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaPlainTextSourceParam"]
+
+
+class BetaPlainTextSourceParam(TypedDict, total=False):
+    data: Required[str]
+
+    media_type: Required[Literal["text/plain"]]
+
+    type: Required[Literal["text"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_delta_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_delta_event.py
new file mode 100644
index 00000000..cd50f289
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_delta_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .beta_text_delta import BetaTextDelta
+from .beta_thinking_delta import BetaThinkingDelta
+from .beta_citations_delta import BetaCitationsDelta
+from .beta_signature_delta import BetaSignatureDelta
+from .beta_input_json_delta import BetaInputJSONDelta
+
+__all__ = ["BetaRawContentBlockDeltaEvent", "Delta"]
+
+Delta: TypeAlias = Annotated[
+    Union[BetaTextDelta, BetaInputJSONDelta, BetaCitationsDelta, BetaThinkingDelta, BetaSignatureDelta],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class BetaRawContentBlockDeltaEvent(BaseModel):
+    delta: Delta
+
+    index: int
+
+    type: Literal["content_block_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_start_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_start_event.py
new file mode 100644
index 00000000..086c216c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_start_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .beta_text_block import BetaTextBlock
+from .beta_thinking_block import BetaThinkingBlock
+from .beta_tool_use_block import BetaToolUseBlock
+from .beta_redacted_thinking_block import BetaRedactedThinkingBlock
+
+__all__ = ["BetaRawContentBlockStartEvent", "ContentBlock"]
+
+ContentBlock: TypeAlias = Annotated[
+    Union[BetaTextBlock, BetaToolUseBlock, BetaThinkingBlock, BetaRedactedThinkingBlock],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class BetaRawContentBlockStartEvent(BaseModel):
+    content_block: ContentBlock
+
+    index: int
+
+    type: Literal["content_block_start"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_stop_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_stop_event.py
new file mode 100644
index 00000000..d8551860
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_content_block_stop_event.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaRawContentBlockStopEvent"]
+
+
+class BetaRawContentBlockStopEvent(BaseModel):
+    index: int
+
+    type: Literal["content_block_stop"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_delta_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_delta_event.py
new file mode 100644
index 00000000..525fd10c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_delta_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .beta_message_delta_usage import BetaMessageDeltaUsage
+
+__all__ = ["BetaRawMessageDeltaEvent", "Delta"]
+
+
+class Delta(BaseModel):
+    stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None
+
+    stop_sequence: Optional[str] = None
+
+
+class BetaRawMessageDeltaEvent(BaseModel):
+    delta: Delta
+
+    type: Literal["message_delta"]
+
+    usage: BetaMessageDeltaUsage
+    """Billing and rate-limit usage.
+
+    Anthropic's API bills and rate-limits by token counts, as tokens represent the
+    underlying cost to our systems.
+
+    Under the hood, the API transforms requests into a format suitable for the
+    model. The model's output then goes through a parsing stage before becoming an
+    API response. As a result, the token counts in `usage` will not match one-to-one
+    with the exact visible content of an API request or response.
+
+    For example, `output_tokens` will be non-zero, even for an empty string response
+    from Claude.
+
+    Total input tokens in a request is the summation of `input_tokens`,
+    `cache_creation_input_tokens`, and `cache_read_input_tokens`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_start_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_start_event.py
new file mode 100644
index 00000000..9bb16f94
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_start_event.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .beta_message import BetaMessage
+
+__all__ = ["BetaRawMessageStartEvent"]
+
+
+class BetaRawMessageStartEvent(BaseModel):
+    message: BetaMessage
+
+    type: Literal["message_start"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_stop_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_stop_event.py
new file mode 100644
index 00000000..dff33cde
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_stop_event.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaRawMessageStopEvent"]
+
+
+class BetaRawMessageStopEvent(BaseModel):
+    type: Literal["message_stop"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_stream_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_stream_event.py
new file mode 100644
index 00000000..00ffd7c1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_raw_message_stream_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .beta_raw_message_stop_event import BetaRawMessageStopEvent
+from .beta_raw_message_delta_event import BetaRawMessageDeltaEvent
+from .beta_raw_message_start_event import BetaRawMessageStartEvent
+from .beta_raw_content_block_stop_event import BetaRawContentBlockStopEvent
+from .beta_raw_content_block_delta_event import BetaRawContentBlockDeltaEvent
+from .beta_raw_content_block_start_event import BetaRawContentBlockStartEvent
+
+__all__ = ["BetaRawMessageStreamEvent"]
+
+BetaRawMessageStreamEvent: TypeAlias = Annotated[
+    Union[
+        BetaRawMessageStartEvent,
+        BetaRawMessageDeltaEvent,
+        BetaRawMessageStopEvent,
+        BetaRawContentBlockStartEvent,
+        BetaRawContentBlockDeltaEvent,
+        BetaRawContentBlockStopEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_redacted_thinking_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_redacted_thinking_block.py
new file mode 100644
index 00000000..b27bd933
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_redacted_thinking_block.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaRedactedThinkingBlock"]
+
+
+class BetaRedactedThinkingBlock(BaseModel):
+    data: str
+
+    type: Literal["redacted_thinking"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_redacted_thinking_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_redacted_thinking_block_param.py
new file mode 100644
index 00000000..cc7d870f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_redacted_thinking_block_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaRedactedThinkingBlockParam"]
+
+
+class BetaRedactedThinkingBlockParam(TypedDict, total=False):
+    data: Required[str]
+
+    type: Required[Literal["redacted_thinking"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_signature_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_signature_delta.py
new file mode 100644
index 00000000..a3586826
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_signature_delta.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaSignatureDelta"]
+
+
+class BetaSignatureDelta(BaseModel):
+    signature: str
+
+    type: Literal["signature_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_block.py
new file mode 100644
index 00000000..f6374b41
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_block.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .beta_text_citation import BetaTextCitation
+
+__all__ = ["BetaTextBlock"]
+
+
+class BetaTextBlock(BaseModel):
+    citations: Optional[List[BetaTextCitation]] = None
+    """Citations supporting the text block.
+
+    The type of citation returned will depend on the type of document being cited.
+    Citing a PDF results in `page_location`, plain text results in `char_location`,
+    and content document results in `content_block_location`.
+    """
+
+    text: str
+
+    type: Literal["text"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_block_param.py
new file mode 100644
index 00000000..e40b03a5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_block_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_text_citation_param import BetaTextCitationParam
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaTextBlockParam"]
+
+
+class BetaTextBlockParam(TypedDict, total=False):
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
+
+    citations: Optional[Iterable[BetaTextCitationParam]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_citation.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_citation.py
new file mode 100644
index 00000000..538878b6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_citation.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .beta_citation_char_location import BetaCitationCharLocation
+from .beta_citation_page_location import BetaCitationPageLocation
+from .beta_citation_content_block_location import BetaCitationContentBlockLocation
+
+__all__ = ["BetaTextCitation"]
+
+BetaTextCitation: TypeAlias = Annotated[
+    Union[BetaCitationCharLocation, BetaCitationPageLocation, BetaCitationContentBlockLocation],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_citation_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_citation_param.py
new file mode 100644
index 00000000..b04c3305
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_citation_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .beta_citation_char_location_param import BetaCitationCharLocationParam
+from .beta_citation_page_location_param import BetaCitationPageLocationParam
+from .beta_citation_content_block_location_param import BetaCitationContentBlockLocationParam
+
+__all__ = ["BetaTextCitationParam"]
+
+BetaTextCitationParam: TypeAlias = Union[
+    BetaCitationCharLocationParam, BetaCitationPageLocationParam, BetaCitationContentBlockLocationParam
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_delta.py
new file mode 100644
index 00000000..b94ba5ea
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_text_delta.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaTextDelta"]
+
+
+class BetaTextDelta(BaseModel):
+    text: str
+
+    type: Literal["text_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_block.py
new file mode 100644
index 00000000..9a9c1df8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaThinkingBlock"]
+
+
+class BetaThinkingBlock(BaseModel):
+    signature: str
+
+    thinking: str
+
+    type: Literal["thinking"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_block_param.py
new file mode 100644
index 00000000..5bd43180
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_block_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaThinkingBlockParam"]
+
+
+class BetaThinkingBlockParam(TypedDict, total=False):
+    signature: Required[str]
+
+    thinking: Required[str]
+
+    type: Required[Literal["thinking"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_disabled_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_disabled_param.py
new file mode 100644
index 00000000..e7c4a2a0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_disabled_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaThinkingConfigDisabledParam"]
+
+
+class BetaThinkingConfigDisabledParam(TypedDict, total=False):
+    type: Required[Literal["disabled"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_enabled_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_enabled_param.py
new file mode 100644
index 00000000..f9490c4c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_enabled_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaThinkingConfigEnabledParam"]
+
+
+class BetaThinkingConfigEnabledParam(TypedDict, total=False):
+    budget_tokens: Required[int]
+    """Determines how many tokens Claude can use for its internal reasoning process.
+
+    Larger budgets can enable more thorough analysis for complex problems, improving
+    response quality.
+
+    Must be ≥1024 and less than `max_tokens`.
+
+    See
+    [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+    for details.
+    """
+
+    type: Required[Literal["enabled"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_param.py
new file mode 100644
index 00000000..47494239
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_config_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .beta_thinking_config_enabled_param import BetaThinkingConfigEnabledParam
+from .beta_thinking_config_disabled_param import BetaThinkingConfigDisabledParam
+
+__all__ = ["BetaThinkingConfigParam"]
+
+BetaThinkingConfigParam: TypeAlias = Union[BetaThinkingConfigEnabledParam, BetaThinkingConfigDisabledParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_delta.py
new file mode 100644
index 00000000..790a304e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_thinking_delta.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaThinkingDelta"]
+
+
+class BetaThinkingDelta(BaseModel):
+    thinking: str
+
+    type: Literal["thinking_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_bash_20241022_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_bash_20241022_param.py
new file mode 100644
index 00000000..82ed02b3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_bash_20241022_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolBash20241022Param"]
+
+
+class BetaToolBash20241022Param(TypedDict, total=False):
+    name: Required[Literal["bash"]]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    type: Required[Literal["bash_20241022"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_bash_20250124_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_bash_20250124_param.py
new file mode 100644
index 00000000..3fcab440
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_bash_20250124_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolBash20250124Param"]
+
+
+class BetaToolBash20250124Param(TypedDict, total=False):
+    name: Required[Literal["bash"]]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    type: Required[Literal["bash_20250124"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_any_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_any_param.py
new file mode 100644
index 00000000..6cdac00a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_any_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaToolChoiceAnyParam"]
+
+
+class BetaToolChoiceAnyParam(TypedDict, total=False):
+    type: Required[Literal["any"]]
+
+    disable_parallel_tool_use: bool
+    """Whether to disable parallel tool use.
+
+    Defaults to `false`. If set to `true`, the model will output exactly one tool
+    use.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_auto_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_auto_param.py
new file mode 100644
index 00000000..e2f20572
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_auto_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaToolChoiceAutoParam"]
+
+
+class BetaToolChoiceAutoParam(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+
+    disable_parallel_tool_use: bool
+    """Whether to disable parallel tool use.
+
+    Defaults to `false`. If set to `true`, the model will output at most one tool
+    use.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_none_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_none_param.py
new file mode 100644
index 00000000..3a0951a4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_none_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaToolChoiceNoneParam"]
+
+
+class BetaToolChoiceNoneParam(TypedDict, total=False):
+    type: Required[Literal["none"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_param.py
new file mode 100644
index 00000000..ff6a51aa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .beta_tool_choice_any_param import BetaToolChoiceAnyParam
+from .beta_tool_choice_auto_param import BetaToolChoiceAutoParam
+from .beta_tool_choice_none_param import BetaToolChoiceNoneParam
+from .beta_tool_choice_tool_param import BetaToolChoiceToolParam
+
+__all__ = ["BetaToolChoiceParam"]
+
+BetaToolChoiceParam: TypeAlias = Union[
+    BetaToolChoiceAutoParam, BetaToolChoiceAnyParam, BetaToolChoiceToolParam, BetaToolChoiceNoneParam
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_tool_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_tool_param.py
new file mode 100644
index 00000000..e826237a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_choice_tool_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaToolChoiceToolParam"]
+
+
+class BetaToolChoiceToolParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the tool to use."""
+
+    type: Required[Literal["tool"]]
+
+    disable_parallel_tool_use: bool
+    """Whether to disable parallel tool use.
+
+    Defaults to `false`. If set to `true`, the model will output exactly one tool
+    use.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_computer_use_20241022_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_computer_use_20241022_param.py
new file mode 100644
index 00000000..b95472be
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_computer_use_20241022_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolComputerUse20241022Param"]
+
+
+class BetaToolComputerUse20241022Param(TypedDict, total=False):
+    display_height_px: Required[int]
+    """The height of the display in pixels."""
+
+    display_width_px: Required[int]
+    """The width of the display in pixels."""
+
+    name: Required[Literal["computer"]]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    type: Required[Literal["computer_20241022"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
+
+    display_number: Optional[int]
+    """The X11 display number (e.g. 0, 1) for the display."""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_computer_use_20250124_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_computer_use_20250124_param.py
new file mode 100644
index 00000000..089d67aa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_computer_use_20250124_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolComputerUse20250124Param"]
+
+
+class BetaToolComputerUse20250124Param(TypedDict, total=False):
+    display_height_px: Required[int]
+    """The height of the display in pixels."""
+
+    display_width_px: Required[int]
+    """The width of the display in pixels."""
+
+    name: Required[Literal["computer"]]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    type: Required[Literal["computer_20250124"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
+
+    display_number: Optional[int]
+    """The X11 display number (e.g. 0, 1) for the display."""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_param.py
new file mode 100644
index 00000000..da9d43bc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_param.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolParam", "InputSchema"]
+
+
+class InputSchemaTyped(TypedDict, total=False):
+    type: Required[Literal["object"]]
+
+    properties: Optional[object]
+
+
+InputSchema: TypeAlias = Union[InputSchemaTyped, Dict[str, object]]
+
+
+class BetaToolParam(TypedDict, total=False):
+    input_schema: Required[InputSchema]
+    """[JSON schema](https://json-schema.org/draft/2020-12) for this tool's input.
+
+    This defines the shape of the `input` that your tool accepts and that the model
+    will produce.
+    """
+
+    name: Required[str]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
+
+    description: str
+    """Description of what this tool does.
+
+    Tool descriptions should be as detailed as possible. The more information that
+    the model has about what the tool is and how to use it, the better it will
+    perform. You can use natural language descriptions to reinforce important
+    aspects of the tool input JSON schema.
+    """
+
+    type: Optional[Literal["custom"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_result_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_result_block_param.py
new file mode 100644
index 00000000..9418b650
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_result_block_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .beta_text_block_param import BetaTextBlockParam
+from .beta_image_block_param import BetaImageBlockParam
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolResultBlockParam", "Content"]
+
+Content: TypeAlias = Union[BetaTextBlockParam, BetaImageBlockParam]
+
+
+class BetaToolResultBlockParam(TypedDict, total=False):
+    tool_use_id: Required[str]
+
+    type: Required[Literal["tool_result"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
+
+    content: Union[str, Iterable[Content]]
+
+    is_error: bool
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_text_editor_20241022_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_text_editor_20241022_param.py
new file mode 100644
index 00000000..86c93278
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_text_editor_20241022_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolTextEditor20241022Param"]
+
+
+class BetaToolTextEditor20241022Param(TypedDict, total=False):
+    name: Required[Literal["str_replace_editor"]]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    type: Required[Literal["text_editor_20241022"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_text_editor_20250124_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_text_editor_20250124_param.py
new file mode 100644
index 00000000..07b86bd5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_text_editor_20250124_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolTextEditor20250124Param"]
+
+
+class BetaToolTextEditor20250124Param(TypedDict, total=False):
+    name: Required[Literal["str_replace_editor"]]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    type: Required[Literal["text_editor_20250124"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_union_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_union_param.py
new file mode 100644
index 00000000..d37480da
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_union_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .beta_tool_param import BetaToolParam
+from .beta_tool_bash_20241022_param import BetaToolBash20241022Param
+from .beta_tool_bash_20250124_param import BetaToolBash20250124Param
+from .beta_tool_text_editor_20241022_param import BetaToolTextEditor20241022Param
+from .beta_tool_text_editor_20250124_param import BetaToolTextEditor20250124Param
+from .beta_tool_computer_use_20241022_param import BetaToolComputerUse20241022Param
+from .beta_tool_computer_use_20250124_param import BetaToolComputerUse20250124Param
+
+__all__ = ["BetaToolUnionParam"]
+
+BetaToolUnionParam: TypeAlias = Union[
+    BetaToolParam,
+    BetaToolComputerUse20241022Param,
+    BetaToolBash20241022Param,
+    BetaToolTextEditor20241022Param,
+    BetaToolComputerUse20250124Param,
+    BetaToolBash20250124Param,
+    BetaToolTextEditor20250124Param,
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_use_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_use_block.py
new file mode 100644
index 00000000..7cfc0c33
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_use_block.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaToolUseBlock"]
+
+
+class BetaToolUseBlock(BaseModel):
+    id: str
+
+    input: object
+
+    name: str
+
+    type: Literal["tool_use"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_use_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_use_block_param.py
new file mode 100644
index 00000000..603dc85f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_tool_use_block_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
+
+__all__ = ["BetaToolUseBlockParam"]
+
+
+class BetaToolUseBlockParam(TypedDict, total=False):
+    id: Required[str]
+
+    input: Required[object]
+
+    name: Required[str]
+
+    type: Required[Literal["tool_use"]]
+
+    cache_control: Optional[BetaCacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_url_image_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_url_image_source_param.py
new file mode 100644
index 00000000..a094a433
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_url_image_source_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaURLImageSourceParam"]
+
+
+class BetaURLImageSourceParam(TypedDict, total=False):
+    type: Required[Literal["url"]]
+
+    url: Required[str]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_url_pdf_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_url_pdf_source_param.py
new file mode 100644
index 00000000..acc1eabf
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_url_pdf_source_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BetaURLPDFSourceParam"]
+
+
+class BetaURLPDFSourceParam(TypedDict, total=False):
+    type: Required[Literal["url"]]
+
+    url: Required[str]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_usage.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_usage.py
new file mode 100644
index 00000000..0d956c70
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/beta_usage.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["BetaUsage"]
+
+
+class BetaUsage(BaseModel):
+    cache_creation_input_tokens: Optional[int] = None
+    """The number of input tokens used to create the cache entry."""
+
+    cache_read_input_tokens: Optional[int] = None
+    """The number of input tokens read from the cache."""
+
+    input_tokens: int
+    """The number of input tokens which were used."""
+
+    output_tokens: int
+    """The number of output tokens which were used."""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/message_count_tokens_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/message_count_tokens_params.py
new file mode 100644
index 00000000..056c4289
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/message_count_tokens_params.py
@@ -0,0 +1,234 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Required, Annotated, TypeAlias, TypedDict
+
+from ..._utils import PropertyInfo
+from ..model_param import ModelParam
+from .beta_tool_param import BetaToolParam
+from .beta_message_param import BetaMessageParam
+from ..anthropic_beta_param import AnthropicBetaParam
+from .beta_text_block_param import BetaTextBlockParam
+from .beta_tool_choice_param import BetaToolChoiceParam
+from .beta_thinking_config_param import BetaThinkingConfigParam
+from .beta_tool_bash_20241022_param import BetaToolBash20241022Param
+from .beta_tool_bash_20250124_param import BetaToolBash20250124Param
+from .beta_tool_text_editor_20241022_param import BetaToolTextEditor20241022Param
+from .beta_tool_text_editor_20250124_param import BetaToolTextEditor20250124Param
+from .beta_tool_computer_use_20241022_param import BetaToolComputerUse20241022Param
+from .beta_tool_computer_use_20250124_param import BetaToolComputerUse20250124Param
+
+__all__ = ["MessageCountTokensParams", "Tool"]
+
+
+class MessageCountTokensParams(TypedDict, total=False):
+    messages: Required[Iterable[BetaMessageParam]]
+    """Input messages.
+
+    Our models are trained to operate on alternating `user` and `assistant`
+    conversational turns. When creating a new `Message`, you specify the prior
+    conversational turns with the `messages` parameter, and the model then generates
+    the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+    in your request will be combined into a single turn.
+
+    Each input message must be an object with a `role` and `content`. You can
+    specify a single `user`-role message, or you can include multiple `user` and
+    `assistant` messages.
+
+    If the final message uses the `assistant` role, the response content will
+    continue immediately from the content in that message. This can be used to
+    constrain part of the model's response.
+
+    Example with a single `user` message:
+
+    ```json
+    [{ "role": "user", "content": "Hello, Claude" }]
+    ```
+
+    Example with multiple conversational turns:
+
+    ```json
+    [
+      { "role": "user", "content": "Hello there." },
+      { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+      { "role": "user", "content": "Can you explain LLMs in plain English?" }
+    ]
+    ```
+
+    Example with a partially-filled response from Claude:
+
+    ```json
+    [
+      {
+        "role": "user",
+        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+      },
+      { "role": "assistant", "content": "The best answer is (" }
+    ]
+    ```
+
+    Each input message `content` may be either a single `string` or an array of
+    content blocks, where each block has a specific `type`. Using a `string` for
+    `content` is shorthand for an array of one content block of type `"text"`. The
+    following input messages are equivalent:
+
+    ```json
+    { "role": "user", "content": "Hello, Claude" }
+    ```
+
+    ```json
+    { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+    ```
+
+    Starting with Claude 3 models, you can also send image content blocks:
+
+    ```json
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "image",
+          "source": {
+            "type": "base64",
+            "media_type": "image/jpeg",
+            "data": "/9j/4AAQSkZJRg..."
+          }
+        },
+        { "type": "text", "text": "What is in this image?" }
+      ]
+    }
+    ```
+
+    We currently support the `base64` source type for images, and the `image/jpeg`,
+    `image/png`, `image/gif`, and `image/webp` media types.
+
+    See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+    more input examples.
+
+    Note that if you want to include a
+    [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+    the top-level `system` parameter — there is no `"system"` role for input
+    messages in the Messages API.
+    """
+
+    model: Required[ModelParam]
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    system: Union[str, Iterable[BetaTextBlockParam]]
+    """System prompt.
+
+    A system prompt is a way of providing context and instructions to Claude, such
+    as specifying a particular goal or role. See our
+    [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+    """
+
+    thinking: BetaThinkingConfigParam
+    """Configuration for enabling Claude's extended thinking.
+
+    When enabled, responses include `thinking` content blocks showing Claude's
+    thinking process before the final answer. Requires a minimum budget of 1,024
+    tokens and counts towards your `max_tokens` limit.
+
+    See
+    [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+    for details.
+    """
+
+    tool_choice: BetaToolChoiceParam
+    """How the model should use the provided tools.
+
+    The model can use a specific tool, any available tool, decide by itself, or not
+    use tools at all.
+    """
+
+    tools: Iterable[Tool]
+    """Definitions of tools that the model may use.
+
+    If you include `tools` in your API request, the model may return `tool_use`
+    content blocks that represent the model's use of those tools. You can then run
+    those tools using the tool input generated by the model and then optionally
+    return results back to the model using `tool_result` content blocks.
+
+    Each tool definition includes:
+
+    - `name`: Name of the tool.
+    - `description`: Optional, but strongly-recommended description of the tool.
+    - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+      tool `input` shape that the model will produce in `tool_use` output content
+      blocks.
+
+    For example, if you defined `tools` as:
+
+    ```json
+    [
+      {
+        "name": "get_stock_price",
+        "description": "Get the current stock price for a given ticker symbol.",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "ticker": {
+              "type": "string",
+              "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+            }
+          },
+          "required": ["ticker"]
+        }
+      }
+    ]
+    ```
+
+    And then asked the model "What's the S&P 500 at today?", the model might produce
+    `tool_use` content blocks in the response like this:
+
+    ```json
+    [
+      {
+        "type": "tool_use",
+        "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "name": "get_stock_price",
+        "input": { "ticker": "^GSPC" }
+      }
+    ]
+    ```
+
+    You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+    input, and return the following back to the model in a subsequent `user`
+    message:
+
+    ```json
+    [
+      {
+        "type": "tool_result",
+        "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "content": "259.75 USD"
+      }
+    ]
+    ```
+
+    Tools can be used for workflows that include running client-side tools and
+    functions, or more generally whenever you want the model to produce a particular
+    JSON structure of output.
+
+    See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+    """
+
+    betas: Annotated[List[AnthropicBetaParam], PropertyInfo(alias="anthropic-beta")]
+    """Optional header to specify the beta version(s) you want to use."""
+
+
+Tool: TypeAlias = Union[
+    BetaToolParam,
+    BetaToolComputerUse20241022Param,
+    BetaToolBash20241022Param,
+    BetaToolTextEditor20241022Param,
+    BetaToolComputerUse20250124Param,
+    BetaToolBash20250124Param,
+    BetaToolTextEditor20250124Param,
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/message_create_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/message_create_params.py
new file mode 100644
index 00000000..e05f92da
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/message_create_params.py
@@ -0,0 +1,297 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from ..._utils import PropertyInfo
+from ..model_param import ModelParam
+from .beta_message_param import BetaMessageParam
+from .beta_metadata_param import BetaMetadataParam
+from ..anthropic_beta_param import AnthropicBetaParam
+from .beta_text_block_param import BetaTextBlockParam
+from .beta_tool_union_param import BetaToolUnionParam
+from .beta_tool_choice_param import BetaToolChoiceParam
+from .beta_thinking_config_param import BetaThinkingConfigParam
+
+__all__ = ["MessageCreateParamsBase", "MessageCreateParamsNonStreaming", "MessageCreateParamsStreaming"]
+
+
+class MessageCreateParamsBase(TypedDict, total=False):
+    max_tokens: Required[int]
+    """The maximum number of tokens to generate before stopping.
+
+    Note that our models may stop _before_ reaching this maximum. This parameter
+    only specifies the absolute maximum number of tokens to generate.
+
+    Different models have different maximum values for this parameter. See
+    [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+    """
+
+    messages: Required[Iterable[BetaMessageParam]]
+    """Input messages.
+
+    Our models are trained to operate on alternating `user` and `assistant`
+    conversational turns. When creating a new `Message`, you specify the prior
+    conversational turns with the `messages` parameter, and the model then generates
+    the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+    in your request will be combined into a single turn.
+
+    Each input message must be an object with a `role` and `content`. You can
+    specify a single `user`-role message, or you can include multiple `user` and
+    `assistant` messages.
+
+    If the final message uses the `assistant` role, the response content will
+    continue immediately from the content in that message. This can be used to
+    constrain part of the model's response.
+
+    Example with a single `user` message:
+
+    ```json
+    [{ "role": "user", "content": "Hello, Claude" }]
+    ```
+
+    Example with multiple conversational turns:
+
+    ```json
+    [
+      { "role": "user", "content": "Hello there." },
+      { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+      { "role": "user", "content": "Can you explain LLMs in plain English?" }
+    ]
+    ```
+
+    Example with a partially-filled response from Claude:
+
+    ```json
+    [
+      {
+        "role": "user",
+        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+      },
+      { "role": "assistant", "content": "The best answer is (" }
+    ]
+    ```
+
+    Each input message `content` may be either a single `string` or an array of
+    content blocks, where each block has a specific `type`. Using a `string` for
+    `content` is shorthand for an array of one content block of type `"text"`. The
+    following input messages are equivalent:
+
+    ```json
+    { "role": "user", "content": "Hello, Claude" }
+    ```
+
+    ```json
+    { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+    ```
+
+    Starting with Claude 3 models, you can also send image content blocks:
+
+    ```json
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "image",
+          "source": {
+            "type": "base64",
+            "media_type": "image/jpeg",
+            "data": "/9j/4AAQSkZJRg..."
+          }
+        },
+        { "type": "text", "text": "What is in this image?" }
+      ]
+    }
+    ```
+
+    We currently support the `base64` source type for images, and the `image/jpeg`,
+    `image/png`, `image/gif`, and `image/webp` media types.
+
+    See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+    more input examples.
+
+    Note that if you want to include a
+    [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+    the top-level `system` parameter — there is no `"system"` role for input
+    messages in the Messages API.
+    """
+
+    model: Required[ModelParam]
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    metadata: BetaMetadataParam
+    """An object describing metadata about the request."""
+
+    stop_sequences: List[str]
+    """Custom text sequences that will cause the model to stop generating.
+
+    Our models will normally stop when they have naturally completed their turn,
+    which will result in a response `stop_reason` of `"end_turn"`.
+
+    If you want the model to stop generating when it encounters custom strings of
+    text, you can use the `stop_sequences` parameter. If the model encounters one of
+    the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+    and the response `stop_sequence` value will contain the matched stop sequence.
+    """
+
+    system: Union[str, Iterable[BetaTextBlockParam]]
+    """System prompt.
+
+    A system prompt is a way of providing context and instructions to Claude, such
+    as specifying a particular goal or role. See our
+    [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+    """
+
+    temperature: float
+    """Amount of randomness injected into the response.
+
+    Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+    for analytical / multiple choice, and closer to `1.0` for creative and
+    generative tasks.
+
+    Note that even with `temperature` of `0.0`, the results will not be fully
+    deterministic.
+    """
+
+    thinking: BetaThinkingConfigParam
+    """Configuration for enabling Claude's extended thinking.
+
+    When enabled, responses include `thinking` content blocks showing Claude's
+    thinking process before the final answer. Requires a minimum budget of 1,024
+    tokens and counts towards your `max_tokens` limit.
+
+    See
+    [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+    for details.
+    """
+
+    tool_choice: BetaToolChoiceParam
+    """How the model should use the provided tools.
+
+    The model can use a specific tool, any available tool, decide by itself, or not
+    use tools at all.
+    """
+
+    tools: Iterable[BetaToolUnionParam]
+    """Definitions of tools that the model may use.
+
+    If you include `tools` in your API request, the model may return `tool_use`
+    content blocks that represent the model's use of those tools. You can then run
+    those tools using the tool input generated by the model and then optionally
+    return results back to the model using `tool_result` content blocks.
+
+    Each tool definition includes:
+
+    - `name`: Name of the tool.
+    - `description`: Optional, but strongly-recommended description of the tool.
+    - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+      tool `input` shape that the model will produce in `tool_use` output content
+      blocks.
+
+    For example, if you defined `tools` as:
+
+    ```json
+    [
+      {
+        "name": "get_stock_price",
+        "description": "Get the current stock price for a given ticker symbol.",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "ticker": {
+              "type": "string",
+              "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+            }
+          },
+          "required": ["ticker"]
+        }
+      }
+    ]
+    ```
+
+    And then asked the model "What's the S&P 500 at today?", the model might produce
+    `tool_use` content blocks in the response like this:
+
+    ```json
+    [
+      {
+        "type": "tool_use",
+        "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "name": "get_stock_price",
+        "input": { "ticker": "^GSPC" }
+      }
+    ]
+    ```
+
+    You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+    input, and return the following back to the model in a subsequent `user`
+    message:
+
+    ```json
+    [
+      {
+        "type": "tool_result",
+        "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "content": "259.75 USD"
+      }
+    ]
+    ```
+
+    Tools can be used for workflows that include running client-side tools and
+    functions, or more generally whenever you want the model to produce a particular
+    JSON structure of output.
+
+    See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+    """
+
+    top_k: int
+    """Only sample from the top K options for each subsequent token.
+
+    Used to remove "long tail" low probability responses.
+    [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+    Recommended for advanced use cases only. You usually only need to use
+    `temperature`.
+    """
+
+    top_p: float
+    """Use nucleus sampling.
+
+    In nucleus sampling, we compute the cumulative distribution over all the options
+    for each subsequent token in decreasing probability order and cut it off once it
+    reaches a particular probability specified by `top_p`. You should either alter
+    `temperature` or `top_p`, but not both.
+
+    Recommended for advanced use cases only. You usually only need to use
+    `temperature`.
+    """
+
+    betas: Annotated[List[AnthropicBetaParam], PropertyInfo(alias="anthropic-beta")]
+    """Optional header to specify the beta version(s) you want to use."""
+
+
+class MessageCreateParamsNonStreaming(MessageCreateParamsBase, total=False):
+    stream: Literal[False]
+    """Whether to incrementally stream the response using server-sent events.
+
+    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+    details.
+    """
+
+
+class MessageCreateParamsStreaming(MessageCreateParamsBase):
+    stream: Required[Literal[True]]
+    """Whether to incrementally stream the response using server-sent events.
+
+    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+    details.
+    """
+
+
+MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/__init__.py
new file mode 100644
index 00000000..fef14dd1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/__init__.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .batch_list_params import BatchListParams as BatchListParams
+from .beta_message_batch import BetaMessageBatch as BetaMessageBatch
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .beta_message_batch_result import BetaMessageBatchResult as BetaMessageBatchResult
+from .beta_deleted_message_batch import BetaDeletedMessageBatch as BetaDeletedMessageBatch
+from .beta_message_batch_errored_result import BetaMessageBatchErroredResult as BetaMessageBatchErroredResult
+from .beta_message_batch_expired_result import BetaMessageBatchExpiredResult as BetaMessageBatchExpiredResult
+from .beta_message_batch_request_counts import BetaMessageBatchRequestCounts as BetaMessageBatchRequestCounts
+from .beta_message_batch_canceled_result import BetaMessageBatchCanceledResult as BetaMessageBatchCanceledResult
+from .beta_message_batch_succeeded_result import BetaMessageBatchSucceededResult as BetaMessageBatchSucceededResult
+from .beta_message_batch_individual_response import (
+    BetaMessageBatchIndividualResponse as BetaMessageBatchIndividualResponse,
+)
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/batch_create_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/batch_create_params.py
new file mode 100644
index 00000000..8eb9c4af
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/batch_create_params.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable
+from typing_extensions import Required, Annotated, TypedDict
+
+from ...._utils import PropertyInfo
+from ...anthropic_beta_param import AnthropicBetaParam
+from ..message_create_params import MessageCreateParamsNonStreaming
+
+__all__ = ["BatchCreateParams", "Request"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    requests: Required[Iterable[Request]]
+    """List of requests for prompt completion.
+
+    Each is an individual request to create a Message.
+    """
+
+    betas: Annotated[List[AnthropicBetaParam], PropertyInfo(alias="anthropic-beta")]
+    """Optional header to specify the beta version(s) you want to use."""
+
+
+class Request(TypedDict, total=False):
+    custom_id: Required[str]
+    """Developer-provided ID created for each request in a Message Batch.
+
+    Useful for matching results to requests, as results may be given out of request
+    order.
+
+    Must be unique for each request within the Message Batch.
+    """
+
+    params: Required[MessageCreateParamsNonStreaming]
+    """Messages API creation parameters for the individual request.
+
+    See the [Messages API reference](/en/api/messages) for full documentation on
+    available parameters.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/batch_list_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/batch_list_params.py
new file mode 100644
index 00000000..3f406251
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/batch_list_params.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Annotated, TypedDict
+
+from ...._utils import PropertyInfo
+from ...anthropic_beta_param import AnthropicBetaParam
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
+
+    betas: Annotated[List[AnthropicBetaParam], PropertyInfo(alias="anthropic-beta")]
+    """Optional header to specify the beta version(s) you want to use."""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_deleted_message_batch.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_deleted_message_batch.py
new file mode 100644
index 00000000..f7dd1d52
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_deleted_message_batch.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["BetaDeletedMessageBatch"]
+
+
+class BetaDeletedMessageBatch(BaseModel):
+    id: str
+    """ID of the Message Batch."""
+
+    type: Literal["message_batch_deleted"]
+    """Deleted object type.
+
+    For Message Batches, this is always `"message_batch_deleted"`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch.py
new file mode 100644
index 00000000..1ea92c3a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch.py
@@ -0,0 +1,77 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .beta_message_batch_request_counts import BetaMessageBatchRequestCounts
+
+__all__ = ["BetaMessageBatch"]
+
+
+class BetaMessageBatch(BaseModel):
+    id: str
+    """Unique object identifier.
+
+    The format and length of IDs may change over time.
+    """
+
+    archived_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch was
+    archived and its results became unavailable.
+    """
+
+    cancel_initiated_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which cancellation was
+    initiated for the Message Batch. Specified only if cancellation was initiated.
+    """
+
+    created_at: datetime
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch was
+    created.
+    """
+
+    ended_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which processing for the
+    Message Batch ended. Specified only once processing ends.
+
+    Processing ends when every request in a Message Batch has either succeeded,
+    errored, canceled, or expired.
+    """
+
+    expires_at: datetime
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch will
+    expire and end processing, which is 24 hours after creation.
+    """
+
+    processing_status: Literal["in_progress", "canceling", "ended"]
+    """Processing status of the Message Batch."""
+
+    request_counts: BetaMessageBatchRequestCounts
+    """Tallies requests within the Message Batch, categorized by their status.
+
+    Requests start as `processing` and move to one of the other statuses only once
+    processing of the entire batch ends. The sum of all values always matches the
+    total number of requests in the batch.
+    """
+
+    results_url: Optional[str] = None
+    """URL to a `.jsonl` file containing the results of the Message Batch requests.
+
+    Specified only once processing ends.
+
+    Results in the file are not guaranteed to be in the same order as requests. Use
+    the `custom_id` field to match results to requests.
+    """
+
+    type: Literal["message_batch"]
+    """Object type.
+
+    For Message Batches, this is always `"message_batch"`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_canceled_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_canceled_result.py
new file mode 100644
index 00000000..e5dae348
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_canceled_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["BetaMessageBatchCanceledResult"]
+
+
+class BetaMessageBatchCanceledResult(BaseModel):
+    type: Literal["canceled"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_errored_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_errored_result.py
new file mode 100644
index 00000000..44ea9027
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_errored_result.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ...beta_error_response import BetaErrorResponse
+
+__all__ = ["BetaMessageBatchErroredResult"]
+
+
+class BetaMessageBatchErroredResult(BaseModel):
+    error: BetaErrorResponse
+
+    type: Literal["errored"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_expired_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_expired_result.py
new file mode 100644
index 00000000..0dbfde41
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_expired_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["BetaMessageBatchExpiredResult"]
+
+
+class BetaMessageBatchExpiredResult(BaseModel):
+    type: Literal["expired"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_individual_response.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_individual_response.py
new file mode 100644
index 00000000..8c493934
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_individual_response.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ...._models import BaseModel
+from .beta_message_batch_result import BetaMessageBatchResult
+
+__all__ = ["BetaMessageBatchIndividualResponse"]
+
+
+class BetaMessageBatchIndividualResponse(BaseModel):
+    custom_id: str
+    """Developer-provided ID created for each request in a Message Batch.
+
+    Useful for matching results to requests, as results may be given out of request
+    order.
+
+    Must be unique for each request within the Message Batch.
+    """
+
+    result: BetaMessageBatchResult
+    """Processing result for this request.
+
+    Contains a Message output if processing was successful, an error response if
+    processing failed, or the reason why processing was not attempted, such as
+    cancellation or expiration.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_request_counts.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_request_counts.py
new file mode 100644
index 00000000..48e6952f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_request_counts.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ...._models import BaseModel
+
+__all__ = ["BetaMessageBatchRequestCounts"]
+
+
+class BetaMessageBatchRequestCounts(BaseModel):
+    canceled: int
+    """Number of requests in the Message Batch that have been canceled.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    errored: int
+    """Number of requests in the Message Batch that encountered an error.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    expired: int
+    """Number of requests in the Message Batch that have expired.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    processing: int
+    """Number of requests in the Message Batch that are processing."""
+
+    succeeded: int
+    """Number of requests in the Message Batch that have completed successfully.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_result.py
new file mode 100644
index 00000000..78ca7317
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_result.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .beta_message_batch_errored_result import BetaMessageBatchErroredResult
+from .beta_message_batch_expired_result import BetaMessageBatchExpiredResult
+from .beta_message_batch_canceled_result import BetaMessageBatchCanceledResult
+from .beta_message_batch_succeeded_result import BetaMessageBatchSucceededResult
+
+__all__ = ["BetaMessageBatchResult"]
+
+BetaMessageBatchResult: TypeAlias = Annotated[
+    Union[
+        BetaMessageBatchSucceededResult,
+        BetaMessageBatchErroredResult,
+        BetaMessageBatchCanceledResult,
+        BetaMessageBatchExpiredResult,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_succeeded_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_succeeded_result.py
new file mode 100644
index 00000000..94389d60
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/messages/beta_message_batch_succeeded_result.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ..beta_message import BetaMessage
+
+__all__ = ["BetaMessageBatchSucceededResult"]
+
+
+class BetaMessageBatchSucceededResult(BaseModel):
+    message: BetaMessage
+
+    type: Literal["succeeded"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta/model_list_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta/model_list_params.py
new file mode 100644
index 00000000..b16d22a3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta/model_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ModelListParams"]
+
+
+class ModelListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_api_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_api_error.py
new file mode 100644
index 00000000..16aa604e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_api_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaAPIError"]
+
+
+class BetaAPIError(BaseModel):
+    message: str
+
+    type: Literal["api_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_authentication_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_authentication_error.py
new file mode 100644
index 00000000..8a555570
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_authentication_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaAuthenticationError"]
+
+
+class BetaAuthenticationError(BaseModel):
+    message: str
+
+    type: Literal["authentication_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_billing_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_billing_error.py
new file mode 100644
index 00000000..1ab37614
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_billing_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaBillingError"]
+
+
+class BetaBillingError(BaseModel):
+    message: str
+
+    type: Literal["billing_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_error.py
new file mode 100644
index 00000000..029d80dc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_error.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .beta_api_error import BetaAPIError
+from .beta_billing_error import BetaBillingError
+from .beta_not_found_error import BetaNotFoundError
+from .beta_overloaded_error import BetaOverloadedError
+from .beta_permission_error import BetaPermissionError
+from .beta_rate_limit_error import BetaRateLimitError
+from .beta_authentication_error import BetaAuthenticationError
+from .beta_gateway_timeout_error import BetaGatewayTimeoutError
+from .beta_invalid_request_error import BetaInvalidRequestError
+
+__all__ = ["BetaError"]
+
+BetaError: TypeAlias = Annotated[
+    Union[
+        BetaInvalidRequestError,
+        BetaAuthenticationError,
+        BetaBillingError,
+        BetaPermissionError,
+        BetaNotFoundError,
+        BetaRateLimitError,
+        BetaGatewayTimeoutError,
+        BetaAPIError,
+        BetaOverloadedError,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_error_response.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_error_response.py
new file mode 100644
index 00000000..1751183e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_error_response.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .beta_error import BetaError
+
+__all__ = ["BetaErrorResponse"]
+
+
+class BetaErrorResponse(BaseModel):
+    error: BetaError
+
+    type: Literal["error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_gateway_timeout_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_gateway_timeout_error.py
new file mode 100644
index 00000000..9a29705b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_gateway_timeout_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaGatewayTimeoutError"]
+
+
+class BetaGatewayTimeoutError(BaseModel):
+    message: str
+
+    type: Literal["timeout_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_invalid_request_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_invalid_request_error.py
new file mode 100644
index 00000000..a84d53cc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_invalid_request_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaInvalidRequestError"]
+
+
+class BetaInvalidRequestError(BaseModel):
+    message: str
+
+    type: Literal["invalid_request_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_not_found_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_not_found_error.py
new file mode 100644
index 00000000..3d57cb5a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_not_found_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaNotFoundError"]
+
+
+class BetaNotFoundError(BaseModel):
+    message: str
+
+    type: Literal["not_found_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_overloaded_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_overloaded_error.py
new file mode 100644
index 00000000..ff5dbe81
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_overloaded_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaOverloadedError"]
+
+
+class BetaOverloadedError(BaseModel):
+    message: str
+
+    type: Literal["overloaded_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_permission_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_permission_error.py
new file mode 100644
index 00000000..986cf894
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_permission_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaPermissionError"]
+
+
+class BetaPermissionError(BaseModel):
+    message: str
+
+    type: Literal["permission_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/beta_rate_limit_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/beta_rate_limit_error.py
new file mode 100644
index 00000000..ae3cb1ae
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/beta_rate_limit_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaRateLimitError"]
+
+
+class BetaRateLimitError(BaseModel):
+    message: str
+
+    type: Literal["rate_limit_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/cache_control_ephemeral_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/cache_control_ephemeral_param.py
new file mode 100644
index 00000000..8900071e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/cache_control_ephemeral_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CacheControlEphemeralParam"]
+
+
+class CacheControlEphemeralParam(TypedDict, total=False):
+    type: Required[Literal["ephemeral"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/citation_char_location.py b/.venv/lib/python3.12/site-packages/anthropic/types/citation_char_location.py
new file mode 100644
index 00000000..011b1066
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/citation_char_location.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["CitationCharLocation"]
+
+
+class CitationCharLocation(BaseModel):
+    cited_text: str
+
+    document_index: int
+
+    document_title: Optional[str] = None
+
+    end_char_index: int
+
+    start_char_index: int
+
+    type: Literal["char_location"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/citation_char_location_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/citation_char_location_param.py
new file mode 100644
index 00000000..1cc1dfb1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/citation_char_location_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CitationCharLocationParam"]
+
+
+class CitationCharLocationParam(TypedDict, total=False):
+    cited_text: Required[str]
+
+    document_index: Required[int]
+
+    document_title: Required[Optional[str]]
+
+    end_char_index: Required[int]
+
+    start_char_index: Required[int]
+
+    type: Required[Literal["char_location"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/citation_content_block_location.py b/.venv/lib/python3.12/site-packages/anthropic/types/citation_content_block_location.py
new file mode 100644
index 00000000..0df0ce57
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/citation_content_block_location.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["CitationContentBlockLocation"]
+
+
+class CitationContentBlockLocation(BaseModel):
+    cited_text: str
+
+    document_index: int
+
+    document_title: Optional[str] = None
+
+    end_block_index: int
+
+    start_block_index: int
+
+    type: Literal["content_block_location"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/citation_content_block_location_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/citation_content_block_location_param.py
new file mode 100644
index 00000000..ee0a6a23
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/citation_content_block_location_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CitationContentBlockLocationParam"]
+
+
+class CitationContentBlockLocationParam(TypedDict, total=False):
+    cited_text: Required[str]
+
+    document_index: Required[int]
+
+    document_title: Required[Optional[str]]
+
+    end_block_index: Required[int]
+
+    start_block_index: Required[int]
+
+    type: Required[Literal["content_block_location"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/citation_page_location.py b/.venv/lib/python3.12/site-packages/anthropic/types/citation_page_location.py
new file mode 100644
index 00000000..94c4d509
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/citation_page_location.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["CitationPageLocation"]
+
+
+class CitationPageLocation(BaseModel):
+    cited_text: str
+
+    document_index: int
+
+    document_title: Optional[str] = None
+
+    end_page_number: int
+
+    start_page_number: int
+
+    type: Literal["page_location"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/citation_page_location_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/citation_page_location_param.py
new file mode 100644
index 00000000..483837b5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/citation_page_location_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CitationPageLocationParam"]
+
+
+class CitationPageLocationParam(TypedDict, total=False):
+    cited_text: Required[str]
+
+    document_index: Required[int]
+
+    document_title: Required[Optional[str]]
+
+    end_page_number: Required[int]
+
+    start_page_number: Required[int]
+
+    type: Required[Literal["page_location"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/citations_config_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/citations_config_param.py
new file mode 100644
index 00000000..817397f8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/citations_config_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CitationsConfigParam"]
+
+
+class CitationsConfigParam(TypedDict, total=False):
+    enabled: bool
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/citations_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/citations_delta.py
new file mode 100644
index 00000000..3eab03d1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/citations_delta.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .citation_char_location import CitationCharLocation
+from .citation_page_location import CitationPageLocation
+from .citation_content_block_location import CitationContentBlockLocation
+
+__all__ = ["CitationsDelta", "Citation"]
+
+Citation: TypeAlias = Annotated[
+    Union[CitationCharLocation, CitationPageLocation, CitationContentBlockLocation], PropertyInfo(discriminator="type")
+]
+
+
+class CitationsDelta(BaseModel):
+    citation: Citation
+
+    type: Literal["citations_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/completion.py b/.venv/lib/python3.12/site-packages/anthropic/types/completion.py
new file mode 100644
index 00000000..e6293210
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/completion.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .model import Model
+from .._models import BaseModel
+
+__all__ = ["Completion"]
+
+
+class Completion(BaseModel):
+    id: str
+    """Unique object identifier.
+
+    The format and length of IDs may change over time.
+    """
+
+    completion: str
+    """The resulting completion up to and excluding the stop sequences."""
+
+    model: Model
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    stop_reason: Optional[str] = None
+    """The reason that we stopped.
+
+    This may be one the following values:
+
+    - `"stop_sequence"`: we reached a stop sequence — either provided by you via the
+      `stop_sequences` parameter, or a stop sequence built into the model
+    - `"max_tokens"`: we exceeded `max_tokens_to_sample` or the model's maximum
+    """
+
+    type: Literal["completion"]
+    """Object type.
+
+    For Text Completions, this is always `"completion"`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/completion_create_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/completion_create_params.py
new file mode 100644
index 00000000..0eb25725
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/completion_create_params.py
@@ -0,0 +1,131 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .model_param import ModelParam
+from .metadata_param import MetadataParam
+
+__all__ = [
+    "CompletionRequestStreamingMetadata",
+    "CompletionRequestNonStreamingMetadata",
+    "CompletionRequestNonStreaming",
+    "CompletionRequestStreaming",
+    "CompletionCreateParamsBase",
+    "Metadata",
+    "CompletionCreateParamsNonStreaming",
+    "CompletionCreateParamsStreaming",
+]
+
+
+class CompletionCreateParamsBase(TypedDict, total=False):
+    max_tokens_to_sample: Required[int]
+    """The maximum number of tokens to generate before stopping.
+
+    Note that our models may stop _before_ reaching this maximum. This parameter
+    only specifies the absolute maximum number of tokens to generate.
+    """
+
+    model: Required[ModelParam]
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    prompt: Required[str]
+    """The prompt that you want Claude to complete.
+
+    For proper response generation you will need to format your prompt using
+    alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:
+
+    ```
+    "\n\nHuman: {userQuestion}\n\nAssistant:"
+    ```
+
+    See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
+    our guide to
+    [prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
+    details.
+    """
+
+    metadata: MetadataParam
+    """An object describing metadata about the request."""
+
+    stop_sequences: List[str]
+    """Sequences that will cause the model to stop generating.
+
+    Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
+    sequences in the future. By providing the stop_sequences parameter, you may
+    include additional strings that will cause the model to stop generating.
+    """
+
+    temperature: float
+    """Amount of randomness injected into the response.
+
+    Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+    for analytical / multiple choice, and closer to `1.0` for creative and
+    generative tasks.
+
+    Note that even with `temperature` of `0.0`, the results will not be fully
+    deterministic.
+    """
+
+    top_k: int
+    """Only sample from the top K options for each subsequent token.
+
+    Used to remove "long tail" low probability responses.
+    [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+    Recommended for advanced use cases only. You usually only need to use
+    `temperature`.
+    """
+
+    top_p: float
+    """Use nucleus sampling.
+
+    In nucleus sampling, we compute the cumulative distribution over all the options
+    for each subsequent token in decreasing probability order and cut it off once it
+    reaches a particular probability specified by `top_p`. You should either alter
+    `temperature` or `top_p`, but not both.
+
+    Recommended for advanced use cases only. You usually only need to use
+    `temperature`.
+    """
+
+
+Metadata: TypeAlias = MetadataParam
+"""This is deprecated, `MetadataParam` should be used instead"""
+
+
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
+    stream: Literal[False]
+    """Whether to incrementally stream the response using server-sent events.
+
+    See [streaming](https://docs.anthropic.com/en/api/streaming) for details.
+    """
+
+
+class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
+    stream: Required[Literal[True]]
+    """Whether to incrementally stream the response using server-sent events.
+
+    See [streaming](https://docs.anthropic.com/en/api/streaming) for details.
+    """
+
+
+CompletionRequestStreamingMetadata = MetadataParam
+"""This is deprecated, `MetadataParam` should be used instead"""
+
+CompletionRequestNonStreamingMetadata = MetadataParam
+"""This is deprecated, `MetadataParam` should be used instead"""
+
+CompletionRequestNonStreaming = CompletionCreateParamsNonStreaming
+"""This is deprecated, `CompletionCreateParamsNonStreaming` should be used instead"""
+
+CompletionRequestStreaming = CompletionCreateParamsStreaming
+"""This is deprecated, `CompletionCreateParamsStreaming` should be used instead"""
+
+CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/content_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/content_block.py
new file mode 100644
index 00000000..1bc77596
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/content_block.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .text_block import TextBlock
+from .thinking_block import ThinkingBlock
+from .tool_use_block import ToolUseBlock
+from .redacted_thinking_block import RedactedThinkingBlock
+
+__all__ = ["ContentBlock"]
+
+ContentBlock: TypeAlias = Annotated[
+    Union[TextBlock, ToolUseBlock, ThinkingBlock, RedactedThinkingBlock], PropertyInfo(discriminator="type")
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/content_block_delta_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_delta_event.py
new file mode 100644
index 00000000..a32602b4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_delta_event.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .raw_content_block_delta_event import RawContentBlockDeltaEvent
+
+__all__ = ["ContentBlockDeltaEvent"]
+
+ContentBlockDeltaEvent = RawContentBlockDeltaEvent
+"""The RawContentBlockDeltaEvent type should be used instead"""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/content_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_param.py
new file mode 100644
index 00000000..97f132e7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .text_block_param import TextBlockParam
+from .image_block_param import ImageBlockParam
+from .document_block_param import DocumentBlockParam
+from .thinking_block_param import ThinkingBlockParam
+from .tool_use_block_param import ToolUseBlockParam
+from .tool_result_block_param import ToolResultBlockParam
+from .redacted_thinking_block_param import RedactedThinkingBlockParam
+
+__all__ = ["ContentBlockParam"]
+
+ContentBlockParam: TypeAlias = Union[
+    TextBlockParam,
+    ImageBlockParam,
+    ToolUseBlockParam,
+    ToolResultBlockParam,
+    DocumentBlockParam,
+    ThinkingBlockParam,
+    RedactedThinkingBlockParam,
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/content_block_source_content_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_source_content_param.py
new file mode 100644
index 00000000..0e70cd25
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_source_content_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .text_block_param import TextBlockParam
+from .image_block_param import ImageBlockParam
+
+__all__ = ["ContentBlockSourceContentParam"]
+
+ContentBlockSourceContentParam: TypeAlias = Union[TextBlockParam, ImageBlockParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/content_block_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_source_param.py
new file mode 100644
index 00000000..8050f3e6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_source_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .content_block_source_content_param import ContentBlockSourceContentParam
+
+__all__ = ["ContentBlockSourceParam"]
+
+
+class ContentBlockSourceParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ContentBlockSourceContentParam]]]
+
+    type: Required[Literal["content"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/content_block_start_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_start_event.py
new file mode 100644
index 00000000..873cba3b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_start_event.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .raw_content_block_start_event import RawContentBlockStartEvent
+
+__all__ = ["ContentBlockStartEvent"]
+
+ContentBlockStartEvent = RawContentBlockStartEvent
+"""The RawContentBlockStartEvent type should be used instead"""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/content_block_stop_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_stop_event.py
new file mode 100644
index 00000000..36c62c89
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/content_block_stop_event.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .raw_content_block_stop_event import RawContentBlockStopEvent
+
+__all__ = ["ContentBlockStopEvent"]
+
+ContentBlockStopEvent = RawContentBlockStopEvent
+"""The RawContentBlockStopEvent type should be used instead"""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/document_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/document_block_param.py
new file mode 100644
index 00000000..e3285266
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/document_block_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .url_pdf_source_param import URLPDFSourceParam
+from .citations_config_param import CitationsConfigParam
+from .base64_pdf_source_param import Base64PDFSourceParam
+from .plain_text_source_param import PlainTextSourceParam
+from .content_block_source_param import ContentBlockSourceParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["DocumentBlockParam", "Source"]
+
+Source: TypeAlias = Union[Base64PDFSourceParam, PlainTextSourceParam, ContentBlockSourceParam, URLPDFSourceParam]
+
+
+class DocumentBlockParam(TypedDict, total=False):
+    source: Required[Source]
+
+    type: Required[Literal["document"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
+
+    citations: CitationsConfigParam
+
+    context: Optional[str]
+
+    title: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/image_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/image_block_param.py
new file mode 100644
index 00000000..914ed6bb
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/image_block_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .url_image_source_param import URLImageSourceParam
+from .base64_image_source_param import Base64ImageSourceParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["ImageBlockParam", "Source"]
+
+Source: TypeAlias = Union[Base64ImageSourceParam, URLImageSourceParam]
+
+
+class ImageBlockParam(TypedDict, total=False):
+    source: Required[Source]
+
+    type: Required[Literal["image"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/input_json_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/input_json_delta.py
new file mode 100644
index 00000000..5d735d72
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/input_json_delta.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["InputJSONDelta", "InputJsonDelta"]
+
+
+class InputJSONDelta(BaseModel):
+    partial_json: str
+
+    type: Literal["input_json_delta"]
+
+
+InputJsonDelta = InputJSONDelta
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message.py b/.venv/lib/python3.12/site-packages/anthropic/types/message.py
new file mode 100644
index 00000000..6179ee12
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message.py
@@ -0,0 +1,112 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .model import Model
+from .usage import Usage
+from .._models import BaseModel
+from .content_block import ContentBlock, ContentBlock as ContentBlock
+
+__all__ = ["Message"]
+
+
+class Message(BaseModel):
+    id: str
+    """Unique object identifier.
+
+    The format and length of IDs may change over time.
+    """
+
+    content: List[ContentBlock]
+    """Content generated by the model.
+
+    This is an array of content blocks, each of which has a `type` that determines
+    its shape.
+
+    Example:
+
+    ```json
+    [{ "type": "text", "text": "Hi, I'm Claude." }]
+    ```
+
+    If the request input `messages` ended with an `assistant` turn, then the
+    response `content` will continue directly from that last turn. You can use this
+    to constrain the model's output.
+
+    For example, if the input `messages` were:
+
+    ```json
+    [
+      {
+        "role": "user",
+        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+      },
+      { "role": "assistant", "content": "The best answer is (" }
+    ]
+    ```
+
+    Then the response `content` might be:
+
+    ```json
+    [{ "type": "text", "text": "B)" }]
+    ```
+    """
+
+    model: Model
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    role: Literal["assistant"]
+    """Conversational role of the generated message.
+
+    This will always be `"assistant"`.
+    """
+
+    stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None
+    """The reason that we stopped.
+
+    This may be one the following values:
+
+    - `"end_turn"`: the model reached a natural stopping point
+    - `"max_tokens"`: we exceeded the requested `max_tokens` or the model's maximum
+    - `"stop_sequence"`: one of your provided custom `stop_sequences` was generated
+    - `"tool_use"`: the model invoked one or more tools
+
+    In non-streaming mode this value is always non-null. In streaming mode, it is
+    null in the `message_start` event and non-null otherwise.
+    """
+
+    stop_sequence: Optional[str] = None
+    """Which custom stop sequence was generated, if any.
+
+    This value will be a non-null string if one of your custom stop sequences was
+    generated.
+    """
+
+    type: Literal["message"]
+    """Object type.
+
+    For Messages, this is always `"message"`.
+    """
+
+    usage: Usage
+    """Billing and rate-limit usage.
+
+    Anthropic's API bills and rate-limits by token counts, as tokens represent the
+    underlying cost to our systems.
+
+    Under the hood, the API transforms requests into a format suitable for the
+    model. The model's output then goes through a parsing stage before becoming an
+    API response. As a result, the token counts in `usage` will not match one-to-one
+    with the exact visible content of an API request or response.
+
+    For example, `output_tokens` will be non-zero, even for an empty string response
+    from Claude.
+
+    Total input tokens in a request is the summation of `input_tokens`,
+    `cache_creation_input_tokens`, and `cache_read_input_tokens`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_count_tokens_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_count_tokens_params.py
new file mode 100644
index 00000000..ea88dd5d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_count_tokens_params.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from .model_param import ModelParam
+from .message_param import MessageParam
+from .text_block_param import TextBlockParam
+from .tool_choice_param import ToolChoiceParam
+from .thinking_config_param import ThinkingConfigParam
+from .message_count_tokens_tool_param import MessageCountTokensToolParam
+
+__all__ = ["MessageCountTokensParams"]
+
+
+class MessageCountTokensParams(TypedDict, total=False):
+    messages: Required[Iterable[MessageParam]]
+    """Input messages.
+
+    Our models are trained to operate on alternating `user` and `assistant`
+    conversational turns. When creating a new `Message`, you specify the prior
+    conversational turns with the `messages` parameter, and the model then generates
+    the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+    in your request will be combined into a single turn.
+
+    Each input message must be an object with a `role` and `content`. You can
+    specify a single `user`-role message, or you can include multiple `user` and
+    `assistant` messages.
+
+    If the final message uses the `assistant` role, the response content will
+    continue immediately from the content in that message. This can be used to
+    constrain part of the model's response.
+
+    Example with a single `user` message:
+
+    ```json
+    [{ "role": "user", "content": "Hello, Claude" }]
+    ```
+
+    Example with multiple conversational turns:
+
+    ```json
+    [
+      { "role": "user", "content": "Hello there." },
+      { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+      { "role": "user", "content": "Can you explain LLMs in plain English?" }
+    ]
+    ```
+
+    Example with a partially-filled response from Claude:
+
+    ```json
+    [
+      {
+        "role": "user",
+        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+      },
+      { "role": "assistant", "content": "The best answer is (" }
+    ]
+    ```
+
+    Each input message `content` may be either a single `string` or an array of
+    content blocks, where each block has a specific `type`. Using a `string` for
+    `content` is shorthand for an array of one content block of type `"text"`. The
+    following input messages are equivalent:
+
+    ```json
+    { "role": "user", "content": "Hello, Claude" }
+    ```
+
+    ```json
+    { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+    ```
+
+    Starting with Claude 3 models, you can also send image content blocks:
+
+    ```json
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "image",
+          "source": {
+            "type": "base64",
+            "media_type": "image/jpeg",
+            "data": "/9j/4AAQSkZJRg..."
+          }
+        },
+        { "type": "text", "text": "What is in this image?" }
+      ]
+    }
+    ```
+
+    We currently support the `base64` source type for images, and the `image/jpeg`,
+    `image/png`, `image/gif`, and `image/webp` media types.
+
+    See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+    more input examples.
+
+    Note that if you want to include a
+    [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+    the top-level `system` parameter — there is no `"system"` role for input
+    messages in the Messages API.
+    """
+
+    model: Required[ModelParam]
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    system: Union[str, Iterable[TextBlockParam]]
+    """System prompt.
+
+    A system prompt is a way of providing context and instructions to Claude, such
+    as specifying a particular goal or role. See our
+    [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+    """
+
+    thinking: ThinkingConfigParam
+    """Configuration for enabling Claude's extended thinking.
+
+    When enabled, responses include `thinking` content blocks showing Claude's
+    thinking process before the final answer. Requires a minimum budget of 1,024
+    tokens and counts towards your `max_tokens` limit.
+
+    See
+    [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+    for details.
+    """
+
+    tool_choice: ToolChoiceParam
+    """How the model should use the provided tools.
+
+    The model can use a specific tool, any available tool, decide by itself, or not
+    use tools at all.
+    """
+
+    tools: Iterable[MessageCountTokensToolParam]
+    """Definitions of tools that the model may use.
+
+    If you include `tools` in your API request, the model may return `tool_use`
+    content blocks that represent the model's use of those tools. You can then run
+    those tools using the tool input generated by the model and then optionally
+    return results back to the model using `tool_result` content blocks.
+
+    Each tool definition includes:
+
+    - `name`: Name of the tool.
+    - `description`: Optional, but strongly-recommended description of the tool.
+    - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+      tool `input` shape that the model will produce in `tool_use` output content
+      blocks.
+
+    For example, if you defined `tools` as:
+
+    ```json
+    [
+      {
+        "name": "get_stock_price",
+        "description": "Get the current stock price for a given ticker symbol.",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "ticker": {
+              "type": "string",
+              "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+            }
+          },
+          "required": ["ticker"]
+        }
+      }
+    ]
+    ```
+
+    And then asked the model "What's the S&P 500 at today?", the model might produce
+    `tool_use` content blocks in the response like this:
+
+    ```json
+    [
+      {
+        "type": "tool_use",
+        "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "name": "get_stock_price",
+        "input": { "ticker": "^GSPC" }
+      }
+    ]
+    ```
+
+    You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+    input, and return the following back to the model in a subsequent `user`
+    message:
+
+    ```json
+    [
+      {
+        "type": "tool_result",
+        "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "content": "259.75 USD"
+      }
+    ]
+    ```
+
+    Tools can be used for workflows that include running client-side tools and
+    functions, or more generally whenever you want the model to produce a particular
+    JSON structure of output.
+
+    See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_count_tokens_tool_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_count_tokens_tool_param.py
new file mode 100644
index 00000000..e28c0ccf
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_count_tokens_tool_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .tool_param import ToolParam
+from .tool_bash_20250124_param import ToolBash20250124Param
+from .tool_text_editor_20250124_param import ToolTextEditor20250124Param
+
+__all__ = ["MessageCountTokensToolParam"]
+
+MessageCountTokensToolParam: TypeAlias = Union[ToolParam, ToolBash20250124Param, ToolTextEditor20250124Param]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_create_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_create_params.py
new file mode 100644
index 00000000..c079bafd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_create_params.py
@@ -0,0 +1,320 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .model_param import ModelParam
+from .message_param import MessageParam
+from .metadata_param import MetadataParam
+from .text_block_param import TextBlockParam
+from .tool_union_param import ToolUnionParam
+from .tool_choice_param import ToolChoiceParam
+from .thinking_config_param import ThinkingConfigParam
+from .tool_choice_any_param import ToolChoiceAnyParam
+from .tool_choice_auto_param import ToolChoiceAutoParam
+from .tool_choice_tool_param import ToolChoiceToolParam
+
+__all__ = [
+    "MessageCreateParamsBase",
+    "Metadata",
+    "ToolChoice",
+    "ToolChoiceToolChoiceAuto",
+    "ToolChoiceToolChoiceAny",
+    "ToolChoiceToolChoiceTool",
+    "MessageCreateParamsNonStreaming",
+    "MessageCreateParamsStreaming",
+]
+
+
+class MessageCreateParamsBase(TypedDict, total=False):
+    max_tokens: Required[int]
+    """The maximum number of tokens to generate before stopping.
+
+    Note that our models may stop _before_ reaching this maximum. This parameter
+    only specifies the absolute maximum number of tokens to generate.
+
+    Different models have different maximum values for this parameter. See
+    [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+    """
+
+    messages: Required[Iterable[MessageParam]]
+    """Input messages.
+
+    Our models are trained to operate on alternating `user` and `assistant`
+    conversational turns. When creating a new `Message`, you specify the prior
+    conversational turns with the `messages` parameter, and the model then generates
+    the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+    in your request will be combined into a single turn.
+
+    Each input message must be an object with a `role` and `content`. You can
+    specify a single `user`-role message, or you can include multiple `user` and
+    `assistant` messages.
+
+    If the final message uses the `assistant` role, the response content will
+    continue immediately from the content in that message. This can be used to
+    constrain part of the model's response.
+
+    Example with a single `user` message:
+
+    ```json
+    [{ "role": "user", "content": "Hello, Claude" }]
+    ```
+
+    Example with multiple conversational turns:
+
+    ```json
+    [
+      { "role": "user", "content": "Hello there." },
+      { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+      { "role": "user", "content": "Can you explain LLMs in plain English?" }
+    ]
+    ```
+
+    Example with a partially-filled response from Claude:
+
+    ```json
+    [
+      {
+        "role": "user",
+        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+      },
+      { "role": "assistant", "content": "The best answer is (" }
+    ]
+    ```
+
+    Each input message `content` may be either a single `string` or an array of
+    content blocks, where each block has a specific `type`. Using a `string` for
+    `content` is shorthand for an array of one content block of type `"text"`. The
+    following input messages are equivalent:
+
+    ```json
+    { "role": "user", "content": "Hello, Claude" }
+    ```
+
+    ```json
+    { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+    ```
+
+    Starting with Claude 3 models, you can also send image content blocks:
+
+    ```json
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "image",
+          "source": {
+            "type": "base64",
+            "media_type": "image/jpeg",
+            "data": "/9j/4AAQSkZJRg..."
+          }
+        },
+        { "type": "text", "text": "What is in this image?" }
+      ]
+    }
+    ```
+
+    We currently support the `base64` source type for images, and the `image/jpeg`,
+    `image/png`, `image/gif`, and `image/webp` media types.
+
+    See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+    more input examples.
+
+    Note that if you want to include a
+    [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+    the top-level `system` parameter — there is no `"system"` role for input
+    messages in the Messages API.
+    """
+
+    model: Required[ModelParam]
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    metadata: MetadataParam
+    """An object describing metadata about the request."""
+
+    stop_sequences: List[str]
+    """Custom text sequences that will cause the model to stop generating.
+
+    Our models will normally stop when they have naturally completed their turn,
+    which will result in a response `stop_reason` of `"end_turn"`.
+
+    If you want the model to stop generating when it encounters custom strings of
+    text, you can use the `stop_sequences` parameter. If the model encounters one of
+    the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+    and the response `stop_sequence` value will contain the matched stop sequence.
+    """
+
+    system: Union[str, Iterable[TextBlockParam]]
+    """System prompt.
+
+    A system prompt is a way of providing context and instructions to Claude, such
+    as specifying a particular goal or role. See our
+    [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+    """
+
+    temperature: float
+    """Amount of randomness injected into the response.
+
+    Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+    for analytical / multiple choice, and closer to `1.0` for creative and
+    generative tasks.
+
+    Note that even with `temperature` of `0.0`, the results will not be fully
+    deterministic.
+    """
+
+    thinking: ThinkingConfigParam
+    """Configuration for enabling Claude's extended thinking.
+
+    When enabled, responses include `thinking` content blocks showing Claude's
+    thinking process before the final answer. Requires a minimum budget of 1,024
+    tokens and counts towards your `max_tokens` limit.
+
+    See
+    [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+    for details.
+    """
+
+    tool_choice: ToolChoiceParam
+    """How the model should use the provided tools.
+
+    The model can use a specific tool, any available tool, decide by itself, or not
+    use tools at all.
+    """
+
+    tools: Iterable[ToolUnionParam]
+    """Definitions of tools that the model may use.
+
+    If you include `tools` in your API request, the model may return `tool_use`
+    content blocks that represent the model's use of those tools. You can then run
+    those tools using the tool input generated by the model and then optionally
+    return results back to the model using `tool_result` content blocks.
+
+    Each tool definition includes:
+
+    - `name`: Name of the tool.
+    - `description`: Optional, but strongly-recommended description of the tool.
+    - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+      tool `input` shape that the model will produce in `tool_use` output content
+      blocks.
+
+    For example, if you defined `tools` as:
+
+    ```json
+    [
+      {
+        "name": "get_stock_price",
+        "description": "Get the current stock price for a given ticker symbol.",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "ticker": {
+              "type": "string",
+              "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+            }
+          },
+          "required": ["ticker"]
+        }
+      }
+    ]
+    ```
+
+    And then asked the model "What's the S&P 500 at today?", the model might produce
+    `tool_use` content blocks in the response like this:
+
+    ```json
+    [
+      {
+        "type": "tool_use",
+        "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "name": "get_stock_price",
+        "input": { "ticker": "^GSPC" }
+      }
+    ]
+    ```
+
+    You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+    input, and return the following back to the model in a subsequent `user`
+    message:
+
+    ```json
+    [
+      {
+        "type": "tool_result",
+        "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "content": "259.75 USD"
+      }
+    ]
+    ```
+
+    Tools can be used for workflows that include running client-side tools and
+    functions, or more generally whenever you want the model to produce a particular
+    JSON structure of output.
+
+    See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+    """
+
+    top_k: int
+    """Only sample from the top K options for each subsequent token.
+
+    Used to remove "long tail" low probability responses.
+    [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+    Recommended for advanced use cases only. You usually only need to use
+    `temperature`.
+    """
+
+    top_p: float
+    """Use nucleus sampling.
+
+    In nucleus sampling, we compute the cumulative distribution over all the options
+    for each subsequent token in decreasing probability order and cut it off once it
+    reaches a particular probability specified by `top_p`. You should either alter
+    `temperature` or `top_p`, but not both.
+
+    Recommended for advanced use cases only. You usually only need to use
+    `temperature`.
+    """
+
+
+Metadata: TypeAlias = MetadataParam
+"""This is deprecated, `MetadataParam` should be used instead"""
+
+ToolChoice: TypeAlias = ToolChoiceParam
+"""This is deprecated, `ToolChoiceParam` should be used instead"""
+
+ToolChoiceToolChoiceAuto: TypeAlias = ToolChoiceAutoParam
+"""This is deprecated, `ToolChoiceAutoParam` should be used instead"""
+
+ToolChoiceToolChoiceAny: TypeAlias = ToolChoiceAnyParam
+"""This is deprecated, `ToolChoiceAnyParam` should be used instead"""
+
+ToolChoiceToolChoiceTool: TypeAlias = ToolChoiceToolParam
+"""This is deprecated, `ToolChoiceToolParam` should be used instead"""
+
+
+class MessageCreateParamsNonStreaming(MessageCreateParamsBase, total=False):
+    stream: Literal[False]
+    """Whether to incrementally stream the response using server-sent events.
+
+    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+    details.
+    """
+
+
+class MessageCreateParamsStreaming(MessageCreateParamsBase):
+    stream: Required[Literal[True]]
+    """Whether to incrementally stream the response using server-sent events.
+
+    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+    details.
+    """
+
+
+MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_delta_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_delta_event.py
new file mode 100644
index 00000000..3803629a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_delta_event.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .raw_message_delta_event import RawMessageDeltaEvent
+
+__all__ = ["MessageDeltaEvent"]
+
+MessageDeltaEvent = RawMessageDeltaEvent
+"""The RawMessageDeltaEvent type should be used instead"""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_delta_usage.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_delta_usage.py
new file mode 100644
index 00000000..e4321be4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_delta_usage.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["MessageDeltaUsage"]
+
+
+class MessageDeltaUsage(BaseModel):
+    output_tokens: int
+    """The cumulative number of output tokens which were used."""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_param.py
new file mode 100644
index 00000000..3c054395
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_param.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .content_block import ContentBlock
+from .text_block_param import TextBlockParam
+from .image_block_param import ImageBlockParam
+from .document_block_param import DocumentBlockParam
+from .thinking_block_param import ThinkingBlockParam
+from .tool_use_block_param import ToolUseBlockParam
+from .tool_result_block_param import ToolResultBlockParam
+from .redacted_thinking_block_param import RedactedThinkingBlockParam
+
+__all__ = ["MessageParam"]
+
+
+class MessageParam(TypedDict, total=False):
+    content: Required[
+        Union[
+            str,
+            Iterable[
+                Union[
+                    TextBlockParam,
+                    ImageBlockParam,
+                    ToolUseBlockParam,
+                    ToolResultBlockParam,
+                    DocumentBlockParam,
+                    ThinkingBlockParam,
+                    RedactedThinkingBlockParam,
+                    ContentBlock,
+                ]
+            ],
+        ]
+    ]
+
+    role: Required[Literal["user", "assistant"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_start_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_start_event.py
new file mode 100644
index 00000000..c210d3ad
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_start_event.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .raw_message_start_event import RawMessageStartEvent
+
+__all__ = ["MessageStartEvent"]
+
+MessageStartEvent = RawMessageStartEvent
+"""The RawMessageStartEvent type should be used instead"""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_stop_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_stop_event.py
new file mode 100644
index 00000000..1076a62c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_stop_event.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .raw_message_stop_event import RawMessageStopEvent
+
+__all__ = ["MessageStopEvent"]
+
+MessageStopEvent = RawMessageStopEvent
+"""The RawMessageStopEvent type should be used instead"""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_stream_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_stream_event.py
new file mode 100644
index 00000000..ec5a0125
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_stream_event.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .raw_message_stream_event import RawMessageStreamEvent
+
+__all__ = ["MessageStreamEvent"]
+
+MessageStreamEvent = RawMessageStreamEvent
+"""The RawMessageStreamEvent type should be used instead"""
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/message_tokens_count.py b/.venv/lib/python3.12/site-packages/anthropic/types/message_tokens_count.py
new file mode 100644
index 00000000..d570019f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/message_tokens_count.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["MessageTokensCount"]
+
+
+class MessageTokensCount(BaseModel):
+    input_tokens: int
+    """
+    The total number of tokens across the provided list of messages, system prompt,
+    and tools.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/__init__.py
new file mode 100644
index 00000000..25d311da
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/__init__.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message_batch import MessageBatch as MessageBatch
+from .batch_list_params import BatchListParams as BatchListParams
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .message_batch_result import MessageBatchResult as MessageBatchResult
+from .deleted_message_batch import DeletedMessageBatch as DeletedMessageBatch
+from .message_batch_errored_result import MessageBatchErroredResult as MessageBatchErroredResult
+from .message_batch_expired_result import MessageBatchExpiredResult as MessageBatchExpiredResult
+from .message_batch_request_counts import MessageBatchRequestCounts as MessageBatchRequestCounts
+from .message_batch_canceled_result import MessageBatchCanceledResult as MessageBatchCanceledResult
+from .message_batch_succeeded_result import MessageBatchSucceededResult as MessageBatchSucceededResult
+from .message_batch_individual_response import MessageBatchIndividualResponse as MessageBatchIndividualResponse
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/batch_create_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/batch_create_params.py
new file mode 100644
index 00000000..a82a5ff0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/batch_create_params.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Required, TypedDict
+
+from ..message_create_params import MessageCreateParamsNonStreaming
+
+__all__ = ["BatchCreateParams", "Request"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    requests: Required[Iterable[Request]]
+    """List of requests for prompt completion.
+
+    Each is an individual request to create a Message.
+    """
+
+
+class Request(TypedDict, total=False):
+    custom_id: Required[str]
+    """Developer-provided ID created for each request in a Message Batch.
+
+    Useful for matching results to requests, as results may be given out of request
+    order.
+
+    Must be unique for each request within the Message Batch.
+    """
+
+    params: Required[MessageCreateParamsNonStreaming]
+    """Messages API creation parameters for the individual request.
+
+    See the [Messages API reference](/en/api/messages) for full documentation on
+    available parameters.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/batch_list_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/batch_list_params.py
new file mode 100644
index 00000000..7b290a77
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/batch_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/deleted_message_batch.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/deleted_message_batch.py
new file mode 100644
index 00000000..7a6c321e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/deleted_message_batch.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["DeletedMessageBatch"]
+
+
+class DeletedMessageBatch(BaseModel):
+    id: str
+    """ID of the Message Batch."""
+
+    type: Literal["message_batch_deleted"]
+    """Deleted object type.
+
+    For Message Batches, this is always `"message_batch_deleted"`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch.py
new file mode 100644
index 00000000..a03e73e1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch.py
@@ -0,0 +1,77 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .message_batch_request_counts import MessageBatchRequestCounts
+
+__all__ = ["MessageBatch"]
+
+
+class MessageBatch(BaseModel):
+    id: str
+    """Unique object identifier.
+
+    The format and length of IDs may change over time.
+    """
+
+    archived_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch was
+    archived and its results became unavailable.
+    """
+
+    cancel_initiated_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which cancellation was
+    initiated for the Message Batch. Specified only if cancellation was initiated.
+    """
+
+    created_at: datetime
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch was
+    created.
+    """
+
+    ended_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which processing for the
+    Message Batch ended. Specified only once processing ends.
+
+    Processing ends when every request in a Message Batch has either succeeded,
+    errored, canceled, or expired.
+    """
+
+    expires_at: datetime
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch will
+    expire and end processing, which is 24 hours after creation.
+    """
+
+    processing_status: Literal["in_progress", "canceling", "ended"]
+    """Processing status of the Message Batch."""
+
+    request_counts: MessageBatchRequestCounts
+    """Tallies requests within the Message Batch, categorized by their status.
+
+    Requests start as `processing` and move to one of the other statuses only once
+    processing of the entire batch ends. The sum of all values always matches the
+    total number of requests in the batch.
+    """
+
+    results_url: Optional[str] = None
+    """URL to a `.jsonl` file containing the results of the Message Batch requests.
+
+    Specified only once processing ends.
+
+    Results in the file are not guaranteed to be in the same order as requests. Use
+    the `custom_id` field to match results to requests.
+    """
+
+    type: Literal["message_batch"]
+    """Object type.
+
+    For Message Batches, this is always `"message_batch"`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_canceled_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_canceled_result.py
new file mode 100644
index 00000000..9826aa91
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_canceled_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchCanceledResult"]
+
+
+class MessageBatchCanceledResult(BaseModel):
+    type: Literal["canceled"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_errored_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_errored_result.py
new file mode 100644
index 00000000..5f890bfd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_errored_result.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.error_response import ErrorResponse
+
+__all__ = ["MessageBatchErroredResult"]
+
+
+class MessageBatchErroredResult(BaseModel):
+    error: ErrorResponse
+
+    type: Literal["errored"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_expired_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_expired_result.py
new file mode 100644
index 00000000..ab9964e7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_expired_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchExpiredResult"]
+
+
+class MessageBatchExpiredResult(BaseModel):
+    type: Literal["expired"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_individual_response.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_individual_response.py
new file mode 100644
index 00000000..19d4f090
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_individual_response.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+from .message_batch_result import MessageBatchResult
+
+__all__ = ["MessageBatchIndividualResponse"]
+
+
+class MessageBatchIndividualResponse(BaseModel):
+    custom_id: str
+    """Developer-provided ID created for each request in a Message Batch.
+
+    Useful for matching results to requests, as results may be given out of request
+    order.
+
+    Must be unique for each request within the Message Batch.
+    """
+
+    result: MessageBatchResult
+    """Processing result for this request.
+
+    Contains a Message output if processing was successful, an error response if
+    processing failed, or the reason why processing was not attempted, such as
+    cancellation or expiration.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_request_counts.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_request_counts.py
new file mode 100644
index 00000000..04edc3c3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_request_counts.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchRequestCounts"]
+
+
+class MessageBatchRequestCounts(BaseModel):
+    canceled: int
+    """Number of requests in the Message Batch that have been canceled.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    errored: int
+    """Number of requests in the Message Batch that encountered an error.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    expired: int
+    """Number of requests in the Message Batch that have expired.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    processing: int
+    """Number of requests in the Message Batch that are processing."""
+
+    succeeded: int
+    """Number of requests in the Message Batch that have completed successfully.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_result.py
new file mode 100644
index 00000000..3186f2aa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_result.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .message_batch_errored_result import MessageBatchErroredResult
+from .message_batch_expired_result import MessageBatchExpiredResult
+from .message_batch_canceled_result import MessageBatchCanceledResult
+from .message_batch_succeeded_result import MessageBatchSucceededResult
+
+__all__ = ["MessageBatchResult"]
+
+MessageBatchResult: TypeAlias = Annotated[
+    Union[
+        MessageBatchSucceededResult, MessageBatchErroredResult, MessageBatchCanceledResult, MessageBatchExpiredResult
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_succeeded_result.py b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_succeeded_result.py
new file mode 100644
index 00000000..1cc454a4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/messages/message_batch_succeeded_result.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..message import Message
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchSucceededResult"]
+
+
+class MessageBatchSucceededResult(BaseModel):
+    message: Message
+
+    type: Literal["succeeded"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/metadata_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/metadata_param.py
new file mode 100644
index 00000000..b7bc1ea3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/metadata_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+__all__ = ["MetadataParam"]
+
+
+class MetadataParam(TypedDict, total=False):
+    user_id: Optional[str]
+    """An external identifier for the user who is associated with the request.
+
+    This should be a uuid, hash value, or other opaque identifier. Anthropic may use
+    this id to help detect abuse. Do not include any identifying information such as
+    name, email address, or phone number.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/model.py b/.venv/lib/python3.12/site-packages/anthropic/types/model.py
new file mode 100644
index 00000000..02d40800
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/model.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["Model"]
+
+Model: TypeAlias = Union[
+    Literal[
+        "claude-3-7-sonnet-latest",
+        "claude-3-7-sonnet-20250219",
+        "claude-3-5-haiku-latest",
+        "claude-3-5-haiku-20241022",
+        "claude-3-5-sonnet-latest",
+        "claude-3-5-sonnet-20241022",
+        "claude-3-5-sonnet-20240620",
+        "claude-3-opus-latest",
+        "claude-3-opus-20240229",
+        "claude-3-sonnet-20240229",
+        "claude-3-haiku-20240307",
+        "claude-2.1",
+        "claude-2.0",
+    ],
+    str,
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/model_info.py b/.venv/lib/python3.12/site-packages/anthropic/types/model_info.py
new file mode 100644
index 00000000..0e3945fe
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/model_info.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ModelInfo"]
+
+
+class ModelInfo(BaseModel):
+    id: str
+    """Unique model identifier."""
+
+    created_at: datetime
+    """RFC 3339 datetime string representing the time at which the model was released.
+
+    May be set to an epoch value if the release date is unknown.
+    """
+
+    display_name: str
+    """A human-readable name for the model."""
+
+    type: Literal["model"]
+    """Object type.
+
+    For Models, this is always `"model"`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/model_list_params.py b/.venv/lib/python3.12/site-packages/anthropic/types/model_list_params.py
new file mode 100644
index 00000000..b16d22a3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/model_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ModelListParams"]
+
+
+class ModelListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/model_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/model_param.py
new file mode 100644
index 00000000..bce6f522
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/model_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ModelParam"]
+
+ModelParam: TypeAlias = Union[
+    Literal[
+        "claude-3-7-sonnet-latest",
+        "claude-3-7-sonnet-20250219",
+        "claude-3-5-haiku-latest",
+        "claude-3-5-haiku-20241022",
+        "claude-3-5-sonnet-latest",
+        "claude-3-5-sonnet-20241022",
+        "claude-3-5-sonnet-20240620",
+        "claude-3-opus-latest",
+        "claude-3-opus-20240229",
+        "claude-3-sonnet-20240229",
+        "claude-3-haiku-20240307",
+        "claude-2.1",
+        "claude-2.0",
+    ],
+    str,
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/plain_text_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/plain_text_source_param.py
new file mode 100644
index 00000000..a2a3b8de
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/plain_text_source_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["PlainTextSourceParam"]
+
+
+class PlainTextSourceParam(TypedDict, total=False):
+    data: Required[str]
+
+    media_type: Required[Literal["text/plain"]]
+
+    type: Required[Literal["text"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_delta_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_delta_event.py
new file mode 100644
index 00000000..5bdbf09a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_delta_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .text_delta import TextDelta
+from .thinking_delta import ThinkingDelta
+from .citations_delta import CitationsDelta
+from .signature_delta import SignatureDelta
+from .input_json_delta import InputJSONDelta
+
+__all__ = ["RawContentBlockDeltaEvent", "Delta"]
+
+Delta: TypeAlias = Annotated[
+    Union[TextDelta, InputJSONDelta, CitationsDelta, ThinkingDelta, SignatureDelta], PropertyInfo(discriminator="type")
+]
+
+
+class RawContentBlockDeltaEvent(BaseModel):
+    delta: Delta
+
+    index: int
+
+    type: Literal["content_block_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_start_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_start_event.py
new file mode 100644
index 00000000..bfbaa63d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_start_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .text_block import TextBlock
+from .thinking_block import ThinkingBlock
+from .tool_use_block import ToolUseBlock
+from .redacted_thinking_block import RedactedThinkingBlock
+
+__all__ = ["RawContentBlockStartEvent", "ContentBlock"]
+
+ContentBlock: TypeAlias = Annotated[
+    Union[TextBlock, ToolUseBlock, ThinkingBlock, RedactedThinkingBlock], PropertyInfo(discriminator="type")
+]
+
+
+class RawContentBlockStartEvent(BaseModel):
+    content_block: ContentBlock
+
+    index: int
+
+    type: Literal["content_block_start"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_stop_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_stop_event.py
new file mode 100644
index 00000000..6241a8b2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/raw_content_block_stop_event.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["RawContentBlockStopEvent"]
+
+
+class RawContentBlockStopEvent(BaseModel):
+    index: int
+
+    type: Literal["content_block_stop"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_delta_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_delta_event.py
new file mode 100644
index 00000000..3dae1e0d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_delta_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .message_delta_usage import MessageDeltaUsage
+
+__all__ = ["RawMessageDeltaEvent", "Delta"]
+
+
+class Delta(BaseModel):
+    stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None
+
+    stop_sequence: Optional[str] = None
+
+
+class RawMessageDeltaEvent(BaseModel):
+    delta: Delta
+
+    type: Literal["message_delta"]
+
+    usage: MessageDeltaUsage
+    """Billing and rate-limit usage.
+
+    Anthropic's API bills and rate-limits by token counts, as tokens represent the
+    underlying cost to our systems.
+
+    Under the hood, the API transforms requests into a format suitable for the
+    model. The model's output then goes through a parsing stage before becoming an
+    API response. As a result, the token counts in `usage` will not match one-to-one
+    with the exact visible content of an API request or response.
+
+    For example, `output_tokens` will be non-zero, even for an empty string response
+    from Claude.
+
+    Total input tokens in a request is the summation of `input_tokens`,
+    `cache_creation_input_tokens`, and `cache_read_input_tokens`.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_start_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_start_event.py
new file mode 100644
index 00000000..1b9e8904
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_start_event.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .message import Message
+from .._models import BaseModel
+
+__all__ = ["RawMessageStartEvent"]
+
+
+class RawMessageStartEvent(BaseModel):
+    message: Message
+
+    type: Literal["message_start"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_stop_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_stop_event.py
new file mode 100644
index 00000000..d40ccfe2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_stop_event.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["RawMessageStopEvent"]
+
+
+class RawMessageStopEvent(BaseModel):
+    type: Literal["message_stop"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_stream_event.py b/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_stream_event.py
new file mode 100644
index 00000000..728fbe88
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/raw_message_stream_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .raw_message_stop_event import RawMessageStopEvent
+from .raw_message_delta_event import RawMessageDeltaEvent
+from .raw_message_start_event import RawMessageStartEvent
+from .raw_content_block_stop_event import RawContentBlockStopEvent
+from .raw_content_block_delta_event import RawContentBlockDeltaEvent
+from .raw_content_block_start_event import RawContentBlockStartEvent
+
+__all__ = ["RawMessageStreamEvent"]
+
+RawMessageStreamEvent: TypeAlias = Annotated[
+    Union[
+        RawMessageStartEvent,
+        RawMessageDeltaEvent,
+        RawMessageStopEvent,
+        RawContentBlockStartEvent,
+        RawContentBlockDeltaEvent,
+        RawContentBlockStopEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/redacted_thinking_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/redacted_thinking_block.py
new file mode 100644
index 00000000..4850b335
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/redacted_thinking_block.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["RedactedThinkingBlock"]
+
+
+class RedactedThinkingBlock(BaseModel):
+    data: str
+
+    type: Literal["redacted_thinking"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/redacted_thinking_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/redacted_thinking_block_param.py
new file mode 100644
index 00000000..0933188c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/redacted_thinking_block_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RedactedThinkingBlockParam"]
+
+
+class RedactedThinkingBlockParam(TypedDict, total=False):
+    data: Required[str]
+
+    type: Required[Literal["redacted_thinking"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/__init__.py
new file mode 100644
index 00000000..178643b6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/__init__.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .error_object import ErrorObject as ErrorObject
+from .billing_error import BillingError as BillingError
+from .error_response import ErrorResponse as ErrorResponse
+from .not_found_error import NotFoundError as NotFoundError
+from .api_error_object import APIErrorObject as APIErrorObject
+from .overloaded_error import OverloadedError as OverloadedError
+from .permission_error import PermissionError as PermissionError
+from .rate_limit_error import RateLimitError as RateLimitError
+from .authentication_error import AuthenticationError as AuthenticationError
+from .gateway_timeout_error import GatewayTimeoutError as GatewayTimeoutError
+from .invalid_request_error import InvalidRequestError as InvalidRequestError
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/api_error_object.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/api_error_object.py
new file mode 100644
index 00000000..dd92bead
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/api_error_object.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["APIErrorObject"]
+
+
+class APIErrorObject(BaseModel):
+    message: str
+
+    type: Literal["api_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/authentication_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/authentication_error.py
new file mode 100644
index 00000000..f777f5c8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/authentication_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AuthenticationError"]
+
+
+class AuthenticationError(BaseModel):
+    message: str
+
+    type: Literal["authentication_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/billing_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/billing_error.py
new file mode 100644
index 00000000..26be12bb
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/billing_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BillingError"]
+
+
+class BillingError(BaseModel):
+    message: str
+
+    type: Literal["billing_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/error_object.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/error_object.py
new file mode 100644
index 00000000..086db503
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/error_object.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .billing_error import BillingError
+from .not_found_error import NotFoundError
+from .api_error_object import APIErrorObject
+from .overloaded_error import OverloadedError
+from .permission_error import PermissionError
+from .rate_limit_error import RateLimitError
+from .authentication_error import AuthenticationError
+from .gateway_timeout_error import GatewayTimeoutError
+from .invalid_request_error import InvalidRequestError
+
+__all__ = ["ErrorObject"]
+
+ErrorObject: TypeAlias = Annotated[
+    Union[
+        InvalidRequestError,
+        AuthenticationError,
+        BillingError,
+        PermissionError,
+        NotFoundError,
+        RateLimitError,
+        GatewayTimeoutError,
+        APIErrorObject,
+        OverloadedError,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/error_response.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/error_response.py
new file mode 100644
index 00000000..97034923
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/error_response.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .error_object import ErrorObject
+
+__all__ = ["ErrorResponse"]
+
+
+class ErrorResponse(BaseModel):
+    error: ErrorObject
+
+    type: Literal["error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/gateway_timeout_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/gateway_timeout_error.py
new file mode 100644
index 00000000..908aa12f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/gateway_timeout_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["GatewayTimeoutError"]
+
+
+class GatewayTimeoutError(BaseModel):
+    message: str
+
+    type: Literal["timeout_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/invalid_request_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/invalid_request_error.py
new file mode 100644
index 00000000..ee5befc0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/invalid_request_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InvalidRequestError"]
+
+
+class InvalidRequestError(BaseModel):
+    message: str
+
+    type: Literal["invalid_request_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/not_found_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/not_found_error.py
new file mode 100644
index 00000000..43e826fb
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/not_found_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["NotFoundError"]
+
+
+class NotFoundError(BaseModel):
+    message: str
+
+    type: Literal["not_found_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/overloaded_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/overloaded_error.py
new file mode 100644
index 00000000..74ee8373
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/overloaded_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["OverloadedError"]
+
+
+class OverloadedError(BaseModel):
+    message: str
+
+    type: Literal["overloaded_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/permission_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/permission_error.py
new file mode 100644
index 00000000..48eb3546
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/permission_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["PermissionError"]
+
+
+class PermissionError(BaseModel):
+    message: str
+
+    type: Literal["permission_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/shared/rate_limit_error.py b/.venv/lib/python3.12/site-packages/anthropic/types/shared/rate_limit_error.py
new file mode 100644
index 00000000..3fa065ac
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/shared/rate_limit_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RateLimitError"]
+
+
+class RateLimitError(BaseModel):
+    message: str
+
+    type: Literal["rate_limit_error"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/signature_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/signature_delta.py
new file mode 100644
index 00000000..55d15189
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/signature_delta.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["SignatureDelta"]
+
+
+class SignatureDelta(BaseModel):
+    signature: str
+
+    type: Literal["signature_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/text_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/text_block.py
new file mode 100644
index 00000000..ecdddb69
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/text_block.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .text_citation import TextCitation
+
+__all__ = ["TextBlock"]
+
+
+class TextBlock(BaseModel):
+    citations: Optional[List[TextCitation]] = None
+    """Citations supporting the text block.
+
+    The type of citation returned will depend on the type of document being cited.
+    Citing a PDF results in `page_location`, plain text results in `char_location`,
+    and content document results in `content_block_location`.
+    """
+
+    text: str
+
+    type: Literal["text"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/text_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/text_block_param.py
new file mode 100644
index 00000000..92151733
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/text_block_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .text_citation_param import TextCitationParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["TextBlockParam"]
+
+
+class TextBlockParam(TypedDict, total=False):
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
+
+    citations: Optional[Iterable[TextCitationParam]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/text_citation.py b/.venv/lib/python3.12/site-packages/anthropic/types/text_citation.py
new file mode 100644
index 00000000..159771ae
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/text_citation.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .citation_char_location import CitationCharLocation
+from .citation_page_location import CitationPageLocation
+from .citation_content_block_location import CitationContentBlockLocation
+
+__all__ = ["TextCitation"]
+
+TextCitation: TypeAlias = Annotated[
+    Union[CitationCharLocation, CitationPageLocation, CitationContentBlockLocation], PropertyInfo(discriminator="type")
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/text_citation_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/text_citation_param.py
new file mode 100644
index 00000000..8e988141
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/text_citation_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .citation_char_location_param import CitationCharLocationParam
+from .citation_page_location_param import CitationPageLocationParam
+from .citation_content_block_location_param import CitationContentBlockLocationParam
+
+__all__ = ["TextCitationParam"]
+
+TextCitationParam: TypeAlias = Union[
+    CitationCharLocationParam, CitationPageLocationParam, CitationContentBlockLocationParam
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/text_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/text_delta.py
new file mode 100644
index 00000000..7ce96491
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/text_delta.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["TextDelta"]
+
+
+class TextDelta(BaseModel):
+    text: str
+
+    type: Literal["text_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/thinking_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_block.py
new file mode 100644
index 00000000..7f98b500
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ThinkingBlock"]
+
+
+class ThinkingBlock(BaseModel):
+    signature: str
+
+    thinking: str
+
+    type: Literal["thinking"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/thinking_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_block_param.py
new file mode 100644
index 00000000..d310c7f6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_block_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ThinkingBlockParam"]
+
+
+class ThinkingBlockParam(TypedDict, total=False):
+    signature: Required[str]
+
+    thinking: Required[str]
+
+    type: Required[Literal["thinking"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_disabled_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_disabled_param.py
new file mode 100644
index 00000000..23b5fbad
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_disabled_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ThinkingConfigDisabledParam"]
+
+
+class ThinkingConfigDisabledParam(TypedDict, total=False):
+    type: Required[Literal["disabled"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_enabled_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_enabled_param.py
new file mode 100644
index 00000000..46b54892
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_enabled_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ThinkingConfigEnabledParam"]
+
+
+class ThinkingConfigEnabledParam(TypedDict, total=False):
+    budget_tokens: Required[int]
+    """Determines how many tokens Claude can use for its internal reasoning process.
+
+    Larger budgets can enable more thorough analysis for complex problems, improving
+    response quality.
+
+    Must be ≥1024 and less than `max_tokens`.
+
+    See
+    [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+    for details.
+    """
+
+    type: Required[Literal["enabled"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_param.py
new file mode 100644
index 00000000..0c1f9173
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_config_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .thinking_config_enabled_param import ThinkingConfigEnabledParam
+from .thinking_config_disabled_param import ThinkingConfigDisabledParam
+
+__all__ = ["ThinkingConfigParam"]
+
+ThinkingConfigParam: TypeAlias = Union[ThinkingConfigEnabledParam, ThinkingConfigDisabledParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/thinking_delta.py b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_delta.py
new file mode 100644
index 00000000..fb79933c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/thinking_delta.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ThinkingDelta"]
+
+
+class ThinkingDelta(BaseModel):
+    thinking: str
+
+    type: Literal["thinking_delta"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_bash_20250124_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_bash_20250124_param.py
new file mode 100644
index 00000000..6c8ff0fc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_bash_20250124_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["ToolBash20250124Param"]
+
+
+class ToolBash20250124Param(TypedDict, total=False):
+    name: Required[Literal["bash"]]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    type: Required[Literal["bash_20250124"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_any_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_any_param.py
new file mode 100644
index 00000000..a0a566ea
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_any_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceAnyParam"]
+
+
+class ToolChoiceAnyParam(TypedDict, total=False):
+    type: Required[Literal["any"]]
+
+    disable_parallel_tool_use: bool
+    """Whether to disable parallel tool use.
+
+    Defaults to `false`. If set to `true`, the model will output exactly one tool
+    use.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_auto_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_auto_param.py
new file mode 100644
index 00000000..456f675c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_auto_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceAutoParam"]
+
+
+class ToolChoiceAutoParam(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+
+    disable_parallel_tool_use: bool
+    """Whether to disable parallel tool use.
+
+    Defaults to `false`. If set to `true`, the model will output at most one tool
+    use.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_none_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_none_param.py
new file mode 100644
index 00000000..1e2e68a7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_none_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceNoneParam"]
+
+
+class ToolChoiceNoneParam(TypedDict, total=False):
+    type: Required[Literal["none"]]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_param.py
new file mode 100644
index 00000000..868277d4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .tool_choice_any_param import ToolChoiceAnyParam
+from .tool_choice_auto_param import ToolChoiceAutoParam
+from .tool_choice_none_param import ToolChoiceNoneParam
+from .tool_choice_tool_param import ToolChoiceToolParam
+
+__all__ = ["ToolChoiceParam"]
+
+ToolChoiceParam: TypeAlias = Union[ToolChoiceAutoParam, ToolChoiceAnyParam, ToolChoiceToolParam, ToolChoiceNoneParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_tool_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_tool_param.py
new file mode 100644
index 00000000..aeec9966
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_choice_tool_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceToolParam"]
+
+
+class ToolChoiceToolParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the tool to use."""
+
+    type: Required[Literal["tool"]]
+
+    disable_parallel_tool_use: bool
+    """Whether to disable parallel tool use.
+
+    Defaults to `false`. If set to `true`, the model will output exactly one tool
+    use.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_param.py
new file mode 100644
index 00000000..a01a014e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_param.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .._models import set_pydantic_config
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["ToolParam", "InputSchema"]
+
+
+class InputSchemaTyped(TypedDict, total=False):
+    type: Required[Literal["object"]]
+
+    properties: Optional[object]
+
+
+set_pydantic_config(InputSchemaTyped, {"extra": "allow"})
+
+InputSchema: TypeAlias = Union[InputSchemaTyped, Dict[str, object]]
+
+
+class ToolParam(TypedDict, total=False):
+    input_schema: Required[InputSchema]
+    """[JSON schema](https://json-schema.org/draft/2020-12) for this tool's input.
+
+    This defines the shape of the `input` that your tool accepts and that the model
+    will produce.
+    """
+
+    name: Required[str]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    cache_control: Optional[CacheControlEphemeralParam]
+
+    description: str
+    """Description of what this tool does.
+
+    Tool descriptions should be as detailed as possible. The more information that
+    the model has about what the tool is and how to use it, the better it will
+    perform. You can use natural language descriptions to reinforce important
+    aspects of the tool input JSON schema.
+    """
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_result_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_result_block_param.py
new file mode 100644
index 00000000..b6ca8aa9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_result_block_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .text_block_param import TextBlockParam
+from .image_block_param import ImageBlockParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["ToolResultBlockParam", "Content"]
+
+Content: TypeAlias = Union[TextBlockParam, ImageBlockParam]
+
+
+class ToolResultBlockParam(TypedDict, total=False):
+    tool_use_id: Required[str]
+
+    type: Required[Literal["tool_result"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
+
+    content: Union[str, Iterable[Content]]
+
+    is_error: bool
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_text_editor_20250124_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_text_editor_20250124_param.py
new file mode 100644
index 00000000..94f63102
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_text_editor_20250124_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["ToolTextEditor20250124Param"]
+
+
+class ToolTextEditor20250124Param(TypedDict, total=False):
+    name: Required[Literal["str_replace_editor"]]
+    """Name of the tool.
+
+    This is how the tool will be called by the model and in tool_use blocks.
+    """
+
+    type: Required[Literal["text_editor_20250124"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_union_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_union_param.py
new file mode 100644
index 00000000..6c02090e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_union_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .tool_param import ToolParam
+from .tool_bash_20250124_param import ToolBash20250124Param
+from .tool_text_editor_20250124_param import ToolTextEditor20250124Param
+
+__all__ = ["ToolUnionParam"]
+
+ToolUnionParam: TypeAlias = Union[ToolParam, ToolBash20250124Param, ToolTextEditor20250124Param]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_use_block.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_use_block.py
new file mode 100644
index 00000000..05514471
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_use_block.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ToolUseBlock"]
+
+
+class ToolUseBlock(BaseModel):
+    id: str
+
+    input: object
+
+    name: str
+
+    type: Literal["tool_use"]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/tool_use_block_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/tool_use_block_param.py
new file mode 100644
index 00000000..cc285079
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/tool_use_block_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["ToolUseBlockParam"]
+
+
+class ToolUseBlockParam(TypedDict, total=False):
+    id: Required[str]
+
+    input: Required[object]
+
+    name: Required[str]
+
+    type: Required[Literal["tool_use"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/url_image_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/url_image_source_param.py
new file mode 100644
index 00000000..852b8eee
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/url_image_source_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["URLImageSourceParam"]
+
+
+class URLImageSourceParam(TypedDict, total=False):
+    type: Required[Literal["url"]]
+
+    url: Required[str]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/url_pdf_source_param.py b/.venv/lib/python3.12/site-packages/anthropic/types/url_pdf_source_param.py
new file mode 100644
index 00000000..b5321d56
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/url_pdf_source_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["URLPDFSourceParam"]
+
+
+class URLPDFSourceParam(TypedDict, total=False):
+    type: Required[Literal["url"]]
+
+    url: Required[str]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/types/usage.py b/.venv/lib/python3.12/site-packages/anthropic/types/usage.py
new file mode 100644
index 00000000..b4f817bd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/types/usage.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["Usage"]
+
+
+class Usage(BaseModel):
+    cache_creation_input_tokens: Optional[int] = None
+    """The number of input tokens used to create the cache entry."""
+
+    cache_read_input_tokens: Optional[int] = None
+    """The number of input tokens read from the cache."""
+
+    input_tokens: int
+    """The number of input tokens which were used."""
+
+    output_tokens: int
+    """The number of output tokens which were used."""
author	S. Solomon Darnell	2025-03-28 21:52:21 -0500
committer	S. Solomon Darnell	2025-03-28 21:52:21 -0500
commit	4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree	ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/anthropic
parent	cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download	gn-ai-master.tar.gz