3 files changed, 3303 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/messages/__init__.py b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/__init__.py
new file mode 100644
index 00000000..6e7cf9d9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/__init__.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from .messages import (
+    DEPRECATED_MODELS,
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "DEPRECATED_MODELS",
+]
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/messages/batches.py b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/batches.py
new file mode 100644
index 00000000..4ebd8fd4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/batches.py
@@ -0,0 +1,717 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage
+from ..._exceptions import AnthropicError
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.messages import batch_list_params, batch_create_params
+from ..._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder
+from ...types.messages.message_batch import MessageBatch
+from ...types.messages.deleted_message_batch import DeletedMessageBatch
+from ...types.messages.message_batch_individual_response import MessageBatchIndividualResponse
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/messages/batches",
+            body=maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._get(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[MessageBatch]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/messages/batches",
+            page=SyncPage[MessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=MessageBatch,
+        )
+
+    def delete(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> DeletedMessageBatch:
+        """
+        Delete a Message Batch.
+
+        Message Batches can only be deleted once they've finished processing. If you'd
+        like to delete an in-progress batch, you must first cancel it.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._delete(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DeletedMessageBatch,
+        )
+
+    def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def results(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JSONLDecoder[MessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=JSONLDecoder[MessageBatchIndividualResponse],
+            stream=True,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/messages/batches",
+            body=await async_maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    async def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._get(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[MessageBatch, AsyncPage[MessageBatch]]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/messages/batches",
+            page=AsyncPage[MessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=MessageBatch,
+        )
+
+    async def delete(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> DeletedMessageBatch:
+        """
+        Delete a Message Batch.
+
+        Message Batches can only be deleted once they've finished processing. If you'd
+        like to delete an in-progress batch, you must first cancel it.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._delete(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DeletedMessageBatch,
+        )
+
+    async def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    async def results(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncJSONLDecoder[MessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Learn more about the Message Batches API in our
+        [user guide](/en/docs/build-with-claude/batch-processing)
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = await self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AsyncJSONLDecoder[MessageBatchIndividualResponse],
+            stream=True,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            batches.delete,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/.venv/lib/python3.12/site-packages/anthropic/resources/messages/messages.py b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/messages.py
new file mode 100644
index 00000000..70bceb7f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/anthropic/resources/messages/messages.py
@@ -0,0 +1,2551 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import warnings
+from typing import List, Union, Iterable
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ... import _legacy_response
+from ...types import (
+    ThinkingConfigParam,
+    message_create_params,
+    message_count_tokens_params,
+)
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._constants import DEFAULT_TIMEOUT
+from ..._streaming import Stream, AsyncStream
+from ..._base_client import make_request_options
+from ...lib.streaming import MessageStreamManager, AsyncMessageStreamManager
+from ...types.message import Message
+from ...types.model_param import ModelParam
+from ...types.message_param import MessageParam
+from ...types.metadata_param import MetadataParam
+from ...types.text_block_param import TextBlockParam
+from ...types.tool_union_param import ToolUnionParam
+from ...types.tool_choice_param import ToolChoiceParam
+from ...types.message_tokens_count import MessageTokensCount
+from ...types.thinking_config_param import ThinkingConfigParam
+from ...types.raw_message_stream_event import RawMessageStreamEvent
+from ...types.message_count_tokens_tool_param import MessageCountTokensToolParam
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+DEPRECATED_MODELS = {
+    "claude-1.3": "November 6th, 2024",
+    "claude-1.3-100k": "November 6th, 2024",
+    "claude-instant-1.1": "November 6th, 2024",
+    "claude-instant-1.1-100k": "November 6th, 2024",
+    "claude-instant-1.2": "November 6th, 2024",
+    "claude-3-sonnet-20240229": "July 21st, 2025",
+    "claude-2.1": "July 21st, 2025",
+    "claude-2.0": "July 21st, 2025",
+}
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def batches(self) -> Batches:
+        return Batches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        stream: Literal[True],
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[RawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        stream: bool,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message | Stream[RawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message | Stream[RawMessageStreamEvent]:
+        if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = self._client._calculate_nonstreaming_timeout(max_tokens)
+
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        return self._post(
+            "/v1/messages",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+            stream=stream or False,
+            stream_cls=Stream[RawMessageStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageStreamManager:
+        """Create a Message stream"""
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        extra_headers = {
+            "X-Stainless-Stream-Helper": "messages",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/v1/messages",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "system": system,
+                    "temperature": temperature,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                    "tools": tools,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+            stream=True,
+            stream_cls=Stream[RawMessageStreamEvent],
+        )
+        return MessageStreamManager(make_request)
+
+    def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[MessageCountTokensToolParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Learn more about token counting in our
+        [user guide](/en/docs/build-with-claude/token-counting)
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/messages/count_tokens",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageTokensCount,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def batches(self) -> AsyncBatches:
+        return AsyncBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        stream: Literal[True],
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[RawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        stream: bool,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message | AsyncStream[RawMessageStreamEvent]:
+        """
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Learn more about the Messages API in our [user guide](/en/docs/initial-setup)
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message | AsyncStream[RawMessageStreamEvent]:
+        if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = self._client._calculate_nonstreaming_timeout(max_tokens)
+
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        return await self._post(
+            "/v1/messages",
+            body=await async_maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+            stream=stream or False,
+            stream_cls=AsyncStream[RawMessageStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        metadata: MetadataParam | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolUnionParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncMessageStreamManager:
+        """Create a Message stream"""
+        if model in DEPRECATED_MODELS:
+            warnings.warn(
+                f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        extra_headers = {
+            "X-Stainless-Stream-Helper": "messages",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            "/v1/messages",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "system": system,
+                    "temperature": temperature,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                    "tools": tools,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+            stream=True,
+            stream_cls=AsyncStream[RawMessageStreamEvent],
+        )
+        return AsyncMessageStreamManager(request)
+
+    async def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        thinking: ThinkingConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[MessageCountTokensToolParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Learn more about token counting in our
+        [user guide](/en/docs/build-with-claude/token-counting)
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          thinking: Configuration for enabling Claude's extended thinking.
+
+              When enabled, responses include `thinking` content blocks showing Claude's
+              thinking process before the final answer. Requires a minimum budget of 1,024
+              tokens and counts towards your `max_tokens` limit.
+
+              See
+              [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+              for details.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, decide by itself, or not use tools at all.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
+                tool `input` shape that the model will produce in `tool_use` output content
+                blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/messages/count_tokens",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageTokensCount,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = _legacy_response.to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithRawResponse:
+        return BatchesWithRawResponse(self._messages.batches)
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = _legacy_response.async_to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithRawResponse:
+        return AsyncBatchesWithRawResponse(self._messages.batches)
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithStreamingResponse:
+        return BatchesWithStreamingResponse(self._messages.batches)
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = async_to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithStreamingResponse:
+        return AsyncBatchesWithStreamingResponse(self._messages.batches)