50 files changed, 26502 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/__init__.py
new file mode 100644
index 00000000..d3457cf3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/__init__.py
@@ -0,0 +1,201 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .images import (
+    Images,
+    AsyncImages,
+    ImagesWithRawResponse,
+    AsyncImagesWithRawResponse,
+    ImagesWithStreamingResponse,
+    AsyncImagesWithStreamingResponse,
+)
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+from .responses import (
+    Responses,
+    AsyncResponses,
+    ResponsesWithRawResponse,
+    AsyncResponsesWithRawResponse,
+    ResponsesWithStreamingResponse,
+    AsyncResponsesWithStreamingResponse,
+)
+from .embeddings import (
+    Embeddings,
+    AsyncEmbeddings,
+    EmbeddingsWithRawResponse,
+    AsyncEmbeddingsWithRawResponse,
+    EmbeddingsWithStreamingResponse,
+    AsyncEmbeddingsWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+from .fine_tuning import (
+    FineTuning,
+    AsyncFineTuning,
+    FineTuningWithRawResponse,
+    AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
+)
+from .moderations import (
+    Moderations,
+    AsyncModerations,
+    ModerationsWithRawResponse,
+    AsyncModerationsWithRawResponse,
+    ModerationsWithStreamingResponse,
+    AsyncModerationsWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+
+__all__ = [
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+    "Chat",
+    "AsyncChat",
+    "ChatWithRawResponse",
+    "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
+    "Embeddings",
+    "AsyncEmbeddings",
+    "EmbeddingsWithRawResponse",
+    "AsyncEmbeddingsWithRawResponse",
+    "EmbeddingsWithStreamingResponse",
+    "AsyncEmbeddingsWithStreamingResponse",
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "Images",
+    "AsyncImages",
+    "ImagesWithRawResponse",
+    "AsyncImagesWithRawResponse",
+    "ImagesWithStreamingResponse",
+    "AsyncImagesWithStreamingResponse",
+    "Audio",
+    "AsyncAudio",
+    "AudioWithRawResponse",
+    "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
+    "Moderations",
+    "AsyncModerations",
+    "ModerationsWithRawResponse",
+    "AsyncModerationsWithRawResponse",
+    "ModerationsWithStreamingResponse",
+    "AsyncModerationsWithStreamingResponse",
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
+    "FineTuning",
+    "AsyncFineTuning",
+    "FineTuningWithRawResponse",
+    "AsyncFineTuningWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+    "Responses",
+    "AsyncResponses",
+    "ResponsesWithRawResponse",
+    "AsyncResponsesWithRawResponse",
+    "ResponsesWithStreamingResponse",
+    "AsyncResponsesWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/__init__.py
new file mode 100644
index 00000000..7da1d2db
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/__init__.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
+from .transcriptions import (
+    Transcriptions,
+    AsyncTranscriptions,
+    TranscriptionsWithRawResponse,
+    AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Transcriptions",
+    "AsyncTranscriptions",
+    "TranscriptionsWithRawResponse",
+    "AsyncTranscriptionsWithRawResponse",
+    "TranscriptionsWithStreamingResponse",
+    "AsyncTranscriptionsWithStreamingResponse",
+    "Translations",
+    "AsyncTranslations",
+    "TranslationsWithRawResponse",
+    "AsyncTranslationsWithRawResponse",
+    "TranslationsWithStreamingResponse",
+    "AsyncTranslationsWithStreamingResponse",
+    "Speech",
+    "AsyncSpeech",
+    "SpeechWithRawResponse",
+    "AsyncSpeechWithRawResponse",
+    "SpeechWithStreamingResponse",
+    "AsyncSpeechWithStreamingResponse",
+    "Audio",
+    "AsyncAudio",
+    "AudioWithRawResponse",
+    "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/audio.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/audio.py
new file mode 100644
index 00000000..383b7073
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/audio.py
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
+from .transcriptions import (
+    Transcriptions,
+    AsyncTranscriptions,
+    TranscriptionsWithRawResponse,
+    AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
+)
+
+__all__ = ["Audio", "AsyncAudio"]
+
+
+class Audio(SyncAPIResource):
+    @cached_property
+    def transcriptions(self) -> Transcriptions:
+        return Transcriptions(self._client)
+
+    @cached_property
+    def translations(self) -> Translations:
+        return Translations(self._client)
+
+    @cached_property
+    def speech(self) -> Speech:
+        return Speech(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AudioWithStreamingResponse(self)
+
+
+class AsyncAudio(AsyncAPIResource):
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptions:
+        return AsyncTranscriptions(self._client)
+
+    @cached_property
+    def translations(self) -> AsyncTranslations:
+        return AsyncTranslations(self._client)
+
+    @cached_property
+    def speech(self) -> AsyncSpeech:
+        return AsyncSpeech(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncAudioWithStreamingResponse(self)
+
+
+class AudioWithRawResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithRawResponse:
+        return TranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithRawResponse:
+        return TranslationsWithRawResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> SpeechWithRawResponse:
+        return SpeechWithRawResponse(self._audio.speech)
+
+
+class AsyncAudioWithRawResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithRawResponse:
+        return AsyncTranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithRawResponse:
+        return AsyncTranslationsWithRawResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithRawResponse:
+        return AsyncSpeechWithRawResponse(self._audio.speech)
+
+
+class AudioWithStreamingResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithStreamingResponse:
+        return TranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithStreamingResponse:
+        return TranslationsWithStreamingResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> SpeechWithStreamingResponse:
+        return SpeechWithStreamingResponse(self._audio.speech)
+
+
+class AsyncAudioWithStreamingResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithStreamingResponse:
+        return AsyncTranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithStreamingResponse:
+        return AsyncTranslationsWithStreamingResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithStreamingResponse:
+        return AsyncSpeechWithStreamingResponse(self._audio.speech)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/speech.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/speech.py
new file mode 100644
index 00000000..529e3a47
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/speech.py
@@ -0,0 +1,244 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ...types.audio import speech_create_params
+from ..._base_client import make_request_options
+from ...types.audio.speech_model import SpeechModel
+
+__all__ = ["Speech", "AsyncSpeech"]
+
+
+class Speech(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SpeechWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SpeechWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: str,
+        model: Union[str, SpeechModel],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
+        instructions: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Generates audio from the input text.
+
+        Args:
+          input: The text to generate audio for. The maximum length is 4096 characters.
+
+          model:
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+              `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
+
+          instructions: Control the voice of your generated audio with additional instructions. Does not
+              work with `tts-1` or `tts-1-hd`.
+
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
+
+          speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
+              the default.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return self._post(
+            "/audio/speech",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "instructions": instructions,
+                    "response_format": response_format,
+                    "speed": speed,
+                },
+                speech_create_params.SpeechCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class AsyncSpeech(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSpeechWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSpeechWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: str,
+        model: Union[str, SpeechModel],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
+        instructions: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Generates audio from the input text.
+
+        Args:
+          input: The text to generate audio for. The maximum length is 4096 characters.
+
+          model:
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+              `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
+
+          instructions: Control the voice of your generated audio with additional instructions. Does not
+              work with `tts-1` or `tts-1-hd`.
+
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
+
+          speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
+              the default.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return await self._post(
+            "/audio/speech",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "instructions": instructions,
+                    "response_format": response_format,
+                    "speed": speed,
+                },
+                speech_create_params.SpeechCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class SpeechWithRawResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            speech.create,
+        )
+
+
+class AsyncSpeechWithRawResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            speech.create,
+        )
+
+
+class SpeechWithStreamingResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = to_custom_streamed_response_wrapper(
+            speech.create,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncSpeechWithStreamingResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = async_to_custom_streamed_response_wrapper(
+            speech.create,
+            AsyncStreamedBinaryAPIResponse,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/transcriptions.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/transcriptions.py
new file mode 100644
index 00000000..2a77f91d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/transcriptions.py
@@ -0,0 +1,682 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, List, Union, Mapping, Optional, cast
+from typing_extensions import Literal, overload, assert_never
+
+import httpx
+
+from ... import _legacy_response
+from ...types import AudioResponseFormat
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    extract_files,
+    required_args,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._streaming import Stream, AsyncStream
+from ...types.audio import transcription_create_params
+from ..._base_client import make_request_options
+from ...types.audio_model import AudioModel
+from ...types.audio.transcription import Transcription
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.transcription_include import TranscriptionInclude
+from ...types.audio.transcription_verbose import TranscriptionVerbose
+from ...types.audio.transcription_stream_event import TranscriptionStreamEvent
+from ...types.audio.transcription_create_response import TranscriptionCreateResponse
+
+__all__ = ["Transcriptions", "AsyncTranscriptions"]
+
+log: logging.Logger = logging.getLogger("openai.audio.transcriptions")
+
+
+class Transcriptions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return TranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return TranscriptionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        response_format: Literal["verbose_json"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionVerbose: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: Literal[True],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: bool,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionCreateResponse | Stream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["file", "model"], ["file", "model", "stream"])
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str | Transcription | TranscriptionVerbose | Stream[TranscriptionStreamEvent]:
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "include": include,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "stream": stream,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(  # type: ignore[return-value]
+            "/audio/transcriptions",
+            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_get_response_format_type(response_format),
+            stream=stream or False,
+            stream_cls=Stream[TranscriptionStreamEvent],
+        )
+
+
+class AsyncTranscriptions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncTranscriptionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        response_format: Literal["verbose_json"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionVerbose: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        response_format: Literal["text", "srt", "vtt"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: Literal[True],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: bool,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionCreateResponse | AsyncStream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["file", "model"], ["file", "model", "stream"])
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription | TranscriptionVerbose | str | AsyncStream[TranscriptionStreamEvent]:
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "include": include,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "stream": stream,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/audio/transcriptions",
+            body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_get_response_format_type(response_format),
+            stream=stream or False,
+            stream_cls=AsyncStream[TranscriptionStreamEvent],
+        )
+
+
+class TranscriptionsWithRawResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithRawResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class TranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = to_streamed_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = async_to_streamed_response_wrapper(
+            transcriptions.create,
+        )
+
+
+def _get_response_format_type(
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven,
+) -> type[Transcription | TranscriptionVerbose | str]:
+    if isinstance(response_format, NotGiven) or response_format is None:  # pyright: ignore[reportUnnecessaryComparison]
+        return Transcription
+
+    if response_format == "json":
+        return Transcription
+    elif response_format == "verbose_json":
+        return TranscriptionVerbose
+    elif response_format == "srt" or response_format == "text" or response_format == "vtt":
+        return str
+    elif TYPE_CHECKING:  # type: ignore[unreachable]
+        assert_never(response_format)
+    else:
+        log.warn("Unexpected audio response format: %s", response_format)
+        return Transcription
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/translations.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/translations.py
new file mode 100644
index 00000000..f55dbd0e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/translations.py
@@ -0,0 +1,372 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Union, Mapping, cast
+from typing_extensions import Literal, overload, assert_never
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...types.audio import translation_create_params
+from ..._base_client import make_request_options
+from ...types.audio_model import AudioModel
+from ...types.audio.translation import Translation
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.translation_verbose import TranslationVerbose
+
+__all__ = ["Translations", "AsyncTranslations"]
+
+log: logging.Logger = logging.getLogger("openai.audio.transcriptions")
+
+
+class Translations(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return TranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return TranslationsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["verbose_json"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranslationVerbose: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[Literal["json", "text", "srt", "verbose_json", "vtt"], NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation | TranslationVerbose | str:
+        """
+        Translates audio into English.
+
+        Args:
+          file: The audio file object (not file name) translate, in one of these formats: flac,
+              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should be in English.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(  # type: ignore[return-value]
+            "/audio/translations",
+            body=maybe_transform(body, translation_create_params.TranslationCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_get_response_format_type(response_format),
+        )
+
+
+class AsyncTranslations(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncTranslationsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["verbose_json"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranslationVerbose: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation | TranslationVerbose | str:
+        """
+        Translates audio into English.
+
+        Args:
+          file: The audio file object (not file name) translate, in one of these formats: flac,
+              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should be in English.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/audio/translations",
+            body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_get_response_format_type(response_format),
+        )
+
+
+class TranslationsWithRawResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithRawResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class TranslationsWithStreamingResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithStreamingResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = async_to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+def _get_response_format_type(
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven,
+) -> type[Translation | TranslationVerbose | str]:
+    if isinstance(response_format, NotGiven) or response_format is None:  # pyright: ignore[reportUnnecessaryComparison]
+        return Translation
+
+    if response_format == "json":
+        return Translation
+    elif response_format == "verbose_json":
+        return TranslationVerbose
+    elif response_format == "srt" or response_format == "text" or response_format == "vtt":
+        return str
+    elif TYPE_CHECKING:  # type: ignore[unreachable]
+        assert_never(response_format)
+    else:
+        log.warn("Unexpected audio response format: %s", response_format)
+        return Transcription
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/batches.py b/.venv/lib/python3.12/site-packages/openai/resources/batches.py
new file mode 100644
index 00000000..b7a299be
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/batches.py
@@ -0,0 +1,517 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import batch_list_params, batch_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from ..types.batch import Batch
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.shared_params.metadata import Metadata
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        input_file_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions`
+              are supported. Note that `/v1/embeddings` batches are also restricted to a
+              maximum of 50,000 embedding inputs across all requests in the batch.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
+              and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/batches",
+            body=maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Batch]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=SyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        input_file_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions`
+              are supported. Note that `/v1/embeddings` batches are also restricted to a
+              maximum of 50,000 embedding inputs across all requests in the batch.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
+              and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/batches",
+            body=await async_maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Batch, AsyncCursorPage[Batch]]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=AsyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py
new file mode 100644
index 00000000..87fea252
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
+
+__all__ = [
+    "Assistants",
+    "AsyncAssistants",
+    "AssistantsWithRawResponse",
+    "AsyncAssistantsWithRawResponse",
+    "AssistantsWithStreamingResponse",
+    "AsyncAssistantsWithStreamingResponse",
+    "Threads",
+    "AsyncThreads",
+    "ThreadsWithRawResponse",
+    "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py
new file mode 100644
index 00000000..1c7cbf37
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py
@@ -0,0 +1,1004 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ...types.beta import (
+    assistant_list_params,
+    assistant_create_params,
+    assistant_update_params,
+)
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.beta.assistant import Assistant
+from ...types.shared.chat_model import ChatModel
+from ...types.beta.assistant_deleted import AssistantDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.shared.reasoning_effort import ReasoningEffort
+from ...types.beta.assistant_tool_param import AssistantToolParam
+from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Assistants", "AsyncAssistants"]
+
+
+class Assistants(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AssistantsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AssistantsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Union[str, ChatModel],
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Create an assistant with a model and instructions.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          description: The description of the assistant. The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/assistants",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "name": name,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_create_params.AssistantCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def retrieve(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Retrieves an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def update(
+        self,
+        assistant_id: str,
+        *,
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4.5-preview",
+                "gpt-4.5-preview-2025-02-27",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+        | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """Modifies an assistant.
+
+        Args:
+          description: The description of the assistant.
+
+        The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/assistants/{assistant_id}",
+            body=maybe_transform(
+                {
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "name": name,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_update_params.AssistantUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Assistant]:
+        """Returns a list of assistants.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/assistants",
+            page=SyncCursorPage[Assistant],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    assistant_list_params.AssistantListParams,
+                ),
+            ),
+            model=Assistant,
+        )
+
+    def delete(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantDeleted:
+        """
+        Delete an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AssistantDeleted,
+        )
+
+
+class AsyncAssistants(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAssistantsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncAssistantsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Union[str, ChatModel],
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Create an assistant with a model and instructions.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          description: The description of the assistant. The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/assistants",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "name": name,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_create_params.AssistantCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    async def retrieve(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Retrieves an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    async def update(
+        self,
+        assistant_id: str,
+        *,
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4.5-preview",
+                "gpt-4.5-preview-2025-02-27",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+        | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """Modifies an assistant.
+
+        Args:
+          description: The description of the assistant.
+
+        The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/assistants/{assistant_id}",
+            body=await async_maybe_transform(
+                {
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "name": name,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_update_params.AssistantUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
+        """Returns a list of assistants.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/assistants",
+            page=AsyncCursorPage[Assistant],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    assistant_list_params.AssistantListParams,
+                ),
+            ),
+            model=Assistant,
+        )
+
+    async def delete(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantDeleted:
+        """
+        Delete an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AssistantDeleted,
+        )
+
+
+class AssistantsWithRawResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self._assistants = assistants
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithRawResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self._assistants = assistants
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AssistantsWithStreamingResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self._assistants = assistants
+
+        self.create = to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithStreamingResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self._assistants = assistants
+
+        self.create = async_to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            assistants.delete,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py
new file mode 100644
index 00000000..62fc8258
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py
@@ -0,0 +1,175 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from .chat.chat import Chat, AsyncChat
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .threads.threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .realtime.realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+    @cached_property
+    def chat(self) -> Chat:
+        return Chat(self._client)
+
+    @cached_property
+    def realtime(self) -> Realtime:
+        return Realtime(self._client)
+
+    @cached_property
+    def assistants(self) -> Assistants:
+        return Assistants(self._client)
+
+    @cached_property
+    def threads(self) -> Threads:
+        return Threads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return BetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def chat(self) -> AsyncChat:
+        return AsyncChat(self._client)
+
+    @cached_property
+    def realtime(self) -> AsyncRealtime:
+        return AsyncRealtime(self._client)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistants:
+        return AsyncAssistants(self._client)
+
+    @cached_property
+    def threads(self) -> AsyncThreads:
+        return AsyncThreads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> RealtimeWithRawResponse:
+        return RealtimeWithRawResponse(self._beta.realtime)
+
+    @cached_property
+    def assistants(self) -> AssistantsWithRawResponse:
+        return AssistantsWithRawResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> ThreadsWithRawResponse:
+        return ThreadsWithRawResponse(self._beta.threads)
+
+
+class AsyncBetaWithRawResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithRawResponse:
+        return AsyncRealtimeWithRawResponse(self._beta.realtime)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistantsWithRawResponse:
+        return AsyncAssistantsWithRawResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithRawResponse:
+        return AsyncThreadsWithRawResponse(self._beta.threads)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> RealtimeWithStreamingResponse:
+        return RealtimeWithStreamingResponse(self._beta.realtime)
+
+    @cached_property
+    def assistants(self) -> AssistantsWithStreamingResponse:
+        return AssistantsWithStreamingResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> ThreadsWithStreamingResponse:
+        return ThreadsWithStreamingResponse(self._beta.threads)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithStreamingResponse:
+        return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistantsWithStreamingResponse:
+        return AsyncAssistantsWithStreamingResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithStreamingResponse:
+        return AsyncThreadsWithStreamingResponse(self._beta.threads)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py
new file mode 100644
index 00000000..072d7867
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat import Chat, AsyncChat
+from .completions import Completions, AsyncCompletions
+
+__all__ = [
+    "Completions",
+    "AsyncCompletions",
+    "Chat",
+    "AsyncChat",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py
new file mode 100644
index 00000000..6afdcea3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ...._compat import cached_property
+from .completions import Completions, AsyncCompletions
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Chat", "AsyncChat"]
+
+
+class Chat(SyncAPIResource):
+    @cached_property
+    def completions(self) -> Completions:
+        return Completions(self._client)
+
+
+class AsyncChat(AsyncAPIResource):
+    @cached_property
+    def completions(self) -> AsyncCompletions:
+        return AsyncCompletions(self._client)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py
new file mode 100644
index 00000000..545a3f40
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py
@@ -0,0 +1,634 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Type, Union, Iterable, Optional, cast
+from functools import partial
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream
+from ....types.chat import completion_create_params
+from ...._base_client import make_request_options
+from ....lib._parsing import (
+    ResponseFormatT,
+    validate_input_tools as _validate_input_tools,
+    parse_chat_completion as _parse_chat_completion,
+    type_to_response_format_param as _type_to_response_format,
+)
+from ....types.chat_model import ChatModel
+from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
+from ....types.shared_params import Metadata, ReasoningEffort
+from ....types.chat.chat_completion import ChatCompletion
+from ....types.chat.chat_completion_chunk import ChatCompletionChunk
+from ....types.chat.parsed_chat_completion import ParsedChatCompletion
+from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
+from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
+from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    def parse(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedChatCompletion[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+        You can pass a pydantic model to this method and it will automatically convert the model
+        into a JSON schema, send it to the API and parse the response content back into the given model.
+
+        This method will also automatically parse `function` tool calls if:
+        - You use the `openai.pydantic_function_tool()` helper method
+        - You mark your tool schema with `"strict": True`
+
+        Example usage:
+        ```py
+        from pydantic import BaseModel
+        from openai import OpenAI
+
+
+        class Step(BaseModel):
+            explanation: str
+            output: str
+
+
+        class MathResponse(BaseModel):
+            steps: List[Step]
+            final_answer: str
+
+
+        client = OpenAI()
+        completion = client.beta.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "system", "content": "You are a helpful math tutor."},
+                {"role": "user", "content": "solve 8x + 31 = 2"},
+            ],
+            response_format=MathResponse,
+        )
+
+        message = completion.choices[0].message
+        if message.parsed:
+            print(message.parsed.steps)
+            print("answer: ", message.parsed.final_answer)
+        ```
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "beta.chat.completions.parse",
+            **(extra_headers or {}),
+        }
+
+        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+            return _parse_chat_completion(
+                response_format=response_format,
+                chat_completion=raw_completion,
+                input_tools=tools,
+            )
+
+        return self._post(
+            "/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": _type_to_response_format(response_format),
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": False,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+            stream=False,
+        )
+
+    def stream(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionStreamManager[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+        and automatic accumulation of each delta.
+
+        This also supports all of the parsing utilities that `.parse()` does.
+
+        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+        ```py
+        with client.beta.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[...],
+        ) as stream:
+            for event in stream:
+                if event.type == "content.delta":
+                    print(event.delta, flush=True, end="")
+        ```
+
+        When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+        the context manager.
+        """
+        extra_headers = {
+            "X-Stainless-Helper-Method": "beta.chat.completions.stream",
+            **(extra_headers or {}),
+        }
+
+        api_request: partial[Stream[ChatCompletionChunk]] = partial(
+            self._client.chat.completions.create,
+            messages=messages,
+            model=model,
+            audio=audio,
+            stream=True,
+            response_format=_type_to_response_format(response_format),
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            metadata=metadata,
+            modalities=modalities,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            prediction=prediction,
+            presence_penalty=presence_penalty,
+            reasoning_effort=reasoning_effort,
+            seed=seed,
+            service_tier=service_tier,
+            store=store,
+            stop=stop,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+            web_search_options=web_search_options,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return ChatCompletionStreamManager(
+            api_request,
+            response_format=response_format,
+            input_tools=tools,
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    async def parse(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedChatCompletion[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+        You can pass a pydantic model to this method and it will automatically convert the model
+        into a JSON schema, send it to the API and parse the response content back into the given model.
+
+        This method will also automatically parse `function` tool calls if:
+        - You use the `openai.pydantic_function_tool()` helper method
+        - You mark your tool schema with `"strict": True`
+
+        Example usage:
+        ```py
+        from pydantic import BaseModel
+        from openai import AsyncOpenAI
+
+
+        class Step(BaseModel):
+            explanation: str
+            output: str
+
+
+        class MathResponse(BaseModel):
+            steps: List[Step]
+            final_answer: str
+
+
+        client = AsyncOpenAI()
+        completion = await client.beta.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "system", "content": "You are a helpful math tutor."},
+                {"role": "user", "content": "solve 8x + 31 = 2"},
+            ],
+            response_format=MathResponse,
+        )
+
+        message = completion.choices[0].message
+        if message.parsed:
+            print(message.parsed.steps)
+            print("answer: ", message.parsed.final_answer)
+        ```
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "beta.chat.completions.parse",
+            **(extra_headers or {}),
+        }
+
+        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+            return _parse_chat_completion(
+                response_format=response_format,
+                chat_completion=raw_completion,
+                input_tools=tools,
+            )
+
+        return await self._post(
+            "/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": _type_to_response_format(response_format),
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "store": store,
+                    "stop": stop,
+                    "stream": False,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+            stream=False,
+        )
+
+    def stream(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+        and automatic accumulation of each delta.
+
+        This also supports all of the parsing utilities that `.parse()` does.
+
+        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+        ```py
+        async with client.beta.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[...],
+        ) as stream:
+            async for event in stream:
+                if event.type == "content.delta":
+                    print(event.delta, flush=True, end="")
+        ```
+
+        When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+        the context manager.
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "beta.chat.completions.stream",
+            **(extra_headers or {}),
+        }
+
+        api_request = self._client.chat.completions.create(
+            messages=messages,
+            model=model,
+            audio=audio,
+            stream=True,
+            response_format=_type_to_response_format(response_format),
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            metadata=metadata,
+            modalities=modalities,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            prediction=prediction,
+            presence_penalty=presence_penalty,
+            reasoning_effort=reasoning_effort,
+            seed=seed,
+            service_tier=service_tier,
+            stop=stop,
+            store=store,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            web_search_options=web_search_options,
+        )
+        return AsyncChatCompletionStreamManager(
+            api_request,
+            response_format=response_format,
+            input_tools=tools,
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.parse = _legacy_response.to_raw_response_wrapper(
+            completions.parse,
+        )
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.parse = _legacy_response.async_to_raw_response_wrapper(
+            completions.parse,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.parse = to_streamed_response_wrapper(
+            completions.parse,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.parse = async_to_streamed_response_wrapper(
+            completions.parse,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py
new file mode 100644
index 00000000..7ab3d993
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+from .transcription_sessions import (
+    TranscriptionSessions,
+    AsyncTranscriptionSessions,
+    TranscriptionSessionsWithRawResponse,
+    AsyncTranscriptionSessionsWithRawResponse,
+    TranscriptionSessionsWithStreamingResponse,
+    AsyncTranscriptionSessionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Sessions",
+    "AsyncSessions",
+    "SessionsWithRawResponse",
+    "AsyncSessionsWithRawResponse",
+    "SessionsWithStreamingResponse",
+    "AsyncSessionsWithStreamingResponse",
+    "TranscriptionSessions",
+    "AsyncTranscriptionSessions",
+    "TranscriptionSessionsWithRawResponse",
+    "AsyncTranscriptionSessionsWithRawResponse",
+    "TranscriptionSessionsWithStreamingResponse",
+    "AsyncTranscriptionSessionsWithStreamingResponse",
+    "Realtime",
+    "AsyncRealtime",
+    "RealtimeWithRawResponse",
+    "AsyncRealtimeWithRawResponse",
+    "RealtimeWithStreamingResponse",
+    "AsyncRealtimeWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py
new file mode 100644
index 00000000..76e57f8c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py
@@ -0,0 +1,1066 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Query, Headers, NotGiven
+from ...._utils import (
+    is_azure_client,
+    maybe_transform,
+    strip_not_given,
+    async_maybe_transform,
+    is_async_azure_client,
+)
+from ...._compat import cached_property
+from ...._models import construct_type_unchecked
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._exceptions import OpenAIError
+from ...._base_client import _merge_mappings
+from ....types.beta.realtime import (
+    session_update_event_param,
+    response_create_event_param,
+    transcription_session_update_param,
+)
+from .transcription_sessions import (
+    TranscriptionSessions,
+    AsyncTranscriptionSessions,
+    TranscriptionSessionsWithRawResponse,
+    AsyncTranscriptionSessionsWithRawResponse,
+    TranscriptionSessionsWithStreamingResponse,
+    AsyncTranscriptionSessionsWithStreamingResponse,
+)
+from ....types.websocket_connection_options import WebsocketConnectionOptions
+from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
+from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
+from ....types.beta.realtime.conversation_item_param import ConversationItemParam
+from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
+
+if TYPE_CHECKING:
+    from websockets.sync.client import ClientConnection as WebsocketConnection
+    from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+    from ...._client import OpenAI, AsyncOpenAI
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Realtime(SyncAPIResource):
+    @cached_property
+    def sessions(self) -> Sessions:
+        return Sessions(self._client)
+
+    @cached_property
+    def transcription_sessions(self) -> TranscriptionSessions:
+        return TranscriptionSessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RealtimeWithStreamingResponse(self)
+
+    def connect(
+        self,
+        *,
+        model: str,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> RealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return RealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            model=model,
+        )
+
+
+class AsyncRealtime(AsyncAPIResource):
+    @cached_property
+    def sessions(self) -> AsyncSessions:
+        return AsyncSessions(self._client)
+
+    @cached_property
+    def transcription_sessions(self) -> AsyncTranscriptionSessions:
+        return AsyncTranscriptionSessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRealtimeWithStreamingResponse(self)
+
+    def connect(
+        self,
+        *,
+        model: str,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> AsyncRealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return AsyncRealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            model=model,
+        )
+
+
+class RealtimeWithRawResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithRawResponse:
+        return SessionsWithRawResponse(self._realtime.sessions)
+
+    @cached_property
+    def transcription_sessions(self) -> TranscriptionSessionsWithRawResponse:
+        return TranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeWithRawResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithRawResponse:
+        return AsyncSessionsWithRawResponse(self._realtime.sessions)
+
+    @cached_property
+    def transcription_sessions(self) -> AsyncTranscriptionSessionsWithRawResponse:
+        return AsyncTranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
+
+
+class RealtimeWithStreamingResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithStreamingResponse:
+        return SessionsWithStreamingResponse(self._realtime.sessions)
+
+    @cached_property
+    def transcription_sessions(self) -> TranscriptionSessionsWithStreamingResponse:
+        return TranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeWithStreamingResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithStreamingResponse:
+        return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
+
+    @cached_property
+    def transcription_sessions(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
+        return AsyncTranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeConnection:
+    """Represents a live websocket connection to the Realtime API"""
+
+    session: AsyncRealtimeSessionResource
+    response: AsyncRealtimeResponseResource
+    input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+    conversation: AsyncRealtimeConversationResource
+    transcription_session: AsyncRealtimeTranscriptionSessionResource
+
+    _connection: AsyncWebsocketConnection
+
+    def __init__(self, connection: AsyncWebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = AsyncRealtimeSessionResource(self)
+        self.response = AsyncRealtimeResponseResource(self)
+        self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+        self.conversation = AsyncRealtimeConversationResource(self)
+        self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
+
+    async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield await self.recv()
+        except ConnectionClosedOK:
+            return
+
+    async def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(await self.recv_bytes())
+
+    async def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = await self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        if not isinstance(message, bytes):
+            # passing `decode=False` should always result in us getting `bytes` back
+            raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+        return message
+
+    async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+        )
+        await self._connection.send(data)
+
+    async def close(self, *, code: int = 1000, reason: str = "") -> None:
+        await self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class AsyncRealtimeConnectionManager:
+    """
+    Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = await client.beta.realtime.connect(...).enter()
+    # ...
+    await connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: AsyncOpenAI,
+        model: str,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__model = model
+        self.__connection: AsyncRealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    async def __aenter__(self) -> AsyncRealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = await client.beta.realtime.connect(...).enter()
+        # ...
+        await connection.close()
+        ```
+        """
+        try:
+            from websockets.asyncio.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        extra_query = self.__extra_query
+        auth_headers = self.__client.auth_headers
+        if is_async_azure_client(self.__client):
+            url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query)
+        else:
+            url = self._prepare_url().copy_with(
+                params={
+                    **self.__client.base_url.params,
+                    "model": self.__model,
+                    **extra_query,
+                },
+            )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = AsyncRealtimeConnection(
+            await connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **auth_headers,
+                        "OpenAI-Beta": "realtime=v1",
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __aenter__
+
+    def _prepare_url(self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    async def __aexit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            await self.__connection.close()
+
+
+class RealtimeConnection:
+    """Represents a live websocket connection to the Realtime API"""
+
+    session: RealtimeSessionResource
+    response: RealtimeResponseResource
+    input_audio_buffer: RealtimeInputAudioBufferResource
+    conversation: RealtimeConversationResource
+    transcription_session: RealtimeTranscriptionSessionResource
+
+    _connection: WebsocketConnection
+
+    def __init__(self, connection: WebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = RealtimeSessionResource(self)
+        self.response = RealtimeResponseResource(self)
+        self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+        self.conversation = RealtimeConversationResource(self)
+        self.transcription_session = RealtimeTranscriptionSessionResource(self)
+
+    def __iter__(self) -> Iterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield self.recv()
+        except ConnectionClosedOK:
+            return
+
+    def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(self.recv_bytes())
+
+    def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        if not isinstance(message, bytes):
+            # passing `decode=False` should always result in us getting `bytes` back
+            raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+        return message
+
+    def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+        )
+        self._connection.send(data)
+
+    def close(self, *, code: int = 1000, reason: str = "") -> None:
+        self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class RealtimeConnectionManager:
+    """
+    Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = client.beta.realtime.connect(...).enter()
+    # ...
+    connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: OpenAI,
+        model: str,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__model = model
+        self.__connection: RealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    def __enter__(self) -> RealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = client.beta.realtime.connect(...).enter()
+        # ...
+        connection.close()
+        ```
+        """
+        try:
+            from websockets.sync.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        extra_query = self.__extra_query
+        auth_headers = self.__client.auth_headers
+        if is_azure_client(self.__client):
+            url, auth_headers = self.__client._configure_realtime(self.__model, extra_query)
+        else:
+            url = self._prepare_url().copy_with(
+                params={
+                    **self.__client.base_url.params,
+                    "model": self.__model,
+                    **extra_query,
+                },
+            )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = RealtimeConnection(
+            connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **auth_headers,
+                        "OpenAI-Beta": "realtime=v1",
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __enter__
+
+    def _prepare_url(self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    def __exit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+    def __init__(self, connection: RealtimeConnection) -> None:
+        self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+    def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to update the session’s default configuration.
+        The client may send this event at any time to update any field,
+        except for `voice`. However, note that once a session has been
+        initialized with a particular `model`, it can’t be changed to
+        another model using `session.update`.
+
+        When the server receives a `session.update`, it will respond
+        with a `session.updated` event showing the full, effective configuration.
+        Only the fields that are present are updated. To clear a field like
+        `instructions`, pass an empty string.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+    def create(
+        self,
+        *,
+        event_id: str | NotGiven = NOT_GIVEN,
+        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions`, and `temperature`. These fields will override the Session's
+        configuration for this Response only.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+    def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.cancelled` event or an error if there is no response to
+        cancel.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+    def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a
+        new user message item in the conversation. This event will produce an error
+        if the input audio buffer is empty. When in Server VAD mode, the client does
+        not need to send this event, the server will commit the audio buffer
+        automatically.
+
+        Committing the input audio buffer will trigger input audio transcription
+        (if enabled in session configuration), but it will not create a response
+        from the model. The server will respond with an `input_audio_buffer.committed`
+        event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. In Server VAD
+        mode, the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike made other client events, the server will
+        not send a confirmation response to this event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> RealtimeConversationItemResource:
+        return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+    def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    def create(
+        self,
+        *,
+        item: ConversationItemParam,
+        event_id: str | NotGiven = NOT_GIVEN,
+        previous_item_id: str | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+    def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+        The server will respond with a `conversation.item.retrieved` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
+    def update(
+        self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to update a transcription session."""
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class BaseAsyncRealtimeConnectionResource:
+    def __init__(self, connection: AsyncRealtimeConnection) -> None:
+        self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+    async def update(
+        self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """
+        Send this event to update the session’s default configuration.
+        The client may send this event at any time to update any field,
+        except for `voice`. However, note that once a session has been
+        initialized with a particular `model`, it can’t be changed to
+        another model using `session.update`.
+
+        When the server receives a `session.update`, it will respond
+        with a `session.updated` event showing the full, effective configuration.
+        Only the fields that are present are updated. To clear a field like
+        `instructions`, pass an empty string.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+    async def create(
+        self,
+        *,
+        event_id: str | NotGiven = NOT_GIVEN,
+        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions`, and `temperature`. These fields will override the Session's
+        configuration for this Response only.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+    async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.cancelled` event or an error if there is no response to
+        cancel.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+    async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a
+        new user message item in the conversation. This event will produce an error
+        if the input audio buffer is empty. When in Server VAD mode, the client does
+        not need to send this event, the server will commit the audio buffer
+        automatically.
+
+        Committing the input audio buffer will trigger input audio transcription
+        (if enabled in session configuration), but it will not create a response
+        from the model. The server will respond with an `input_audio_buffer.committed`
+        event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. In Server VAD
+        mode, the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike made other client events, the server will
+        not send a confirmation response to this event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> AsyncRealtimeConversationItemResource:
+        return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+    async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    async def create(
+        self,
+        *,
+        item: ConversationItemParam,
+        event_id: str | NotGiven = NOT_GIVEN,
+        previous_item_id: str | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    async def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+    async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+        The server will respond with a `conversation.item.retrieved` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
+    async def update(
+        self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to update a transcription session."""
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+            )
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py
new file mode 100644
index 00000000..5884e54d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py
@@ -0,0 +1,383 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+        | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through
+              [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+              and should be treated as guidance of input audio content rather than precisely
+              what the model heard. The client can optionally set the language and prompt for
+              transcription, these offer additional guidance to the transcription service.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          model: The Realtime model used for this session.
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+              temperature of 0.8 is highly recommended for best performance.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/realtime/sessions",
+            body=maybe_transform(
+                {
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "model": model,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class AsyncSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+        | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through
+              [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+              and should be treated as guidance of input audio content rather than precisely
+              what the model heard. The client can optionally set the language and prompt for
+              transcription, these offer additional guidance to the transcription service.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          model: The Realtime model used for this session.
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+              temperature of 0.8 is highly recommended for best performance.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/sessions",
+            body=await async_maybe_transform(
+                {
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "model": model,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class SessionsWithRawResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithRawResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class SessionsWithStreamingResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = to_streamed_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithStreamingResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            sessions.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py
new file mode 100644
index 00000000..0917da71
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py
@@ -0,0 +1,277 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import transcription_session_create_params
+from ....types.beta.realtime.transcription_session import TranscriptionSession
+
+__all__ = ["TranscriptionSessions", "AsyncTranscriptionSessions"]
+
+
+class TranscriptionSessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranscriptionSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return TranscriptionSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranscriptionSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return TranscriptionSessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        include: List[str] | NotGiven = NOT_GIVEN,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
+        | NotGiven = NOT_GIVEN,
+        input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionSession:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API specifically for realtime transcriptions. Can be configured with
+        the same session parameters as the `transcription_session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          include:
+              The set of items to include in the transcription. Current available items are:
+
+              - `item.input_audio_transcription.logprobs`
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
+              language and prompt for transcription, these offer additional guidance to the
+              transcription service.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/realtime/transcription_sessions",
+            body=maybe_transform(
+                {
+                    "include": include,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "modalities": modalities,
+                    "turn_detection": turn_detection,
+                },
+                transcription_session_create_params.TranscriptionSessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=TranscriptionSession,
+        )
+
+
+class AsyncTranscriptionSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranscriptionSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranscriptionSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncTranscriptionSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        include: List[str] | NotGiven = NOT_GIVEN,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
+        | NotGiven = NOT_GIVEN,
+        input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionSession:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API specifically for realtime transcriptions. Can be configured with
+        the same session parameters as the `transcription_session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          include:
+              The set of items to include in the transcription. Current available items are:
+
+              - `item.input_audio_transcription.logprobs`
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
+              language and prompt for transcription, these offer additional guidance to the
+              transcription service.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/transcription_sessions",
+            body=await async_maybe_transform(
+                {
+                    "include": include,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "modalities": modalities,
+                    "turn_detection": turn_detection,
+                },
+                transcription_session_create_params.TranscriptionSessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=TranscriptionSession,
+        )
+
+
+class TranscriptionSessionsWithRawResponse:
+    def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
+        self._transcription_sessions = transcription_sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            transcription_sessions.create,
+        )
+
+
+class AsyncTranscriptionSessionsWithRawResponse:
+    def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
+        self._transcription_sessions = transcription_sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            transcription_sessions.create,
+        )
+
+
+class TranscriptionSessionsWithStreamingResponse:
+    def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
+        self._transcription_sessions = transcription_sessions
+
+        self.create = to_streamed_response_wrapper(
+            transcription_sessions.create,
+        )
+
+
+class AsyncTranscriptionSessionsWithStreamingResponse:
+    def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
+        self._transcription_sessions = transcription_sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            transcription_sessions.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py
new file mode 100644
index 00000000..a66e445b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Threads",
+    "AsyncThreads",
+    "ThreadsWithRawResponse",
+    "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py
new file mode 100644
index 00000000..e3374aba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py
@@ -0,0 +1,670 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.threads import message_list_params, message_create_params, message_update_params
+from ....types.beta.threads.message import Message
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.threads.message_deleted import MessageDeleted
+from ....types.beta.threads.message_content_part_param import MessageContentPartParam
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    def create(
+        self,
+        thread_id: str,
+        *,
+        content: Union[str, Iterable[MessageContentPartParam]],
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Create a message.
+
+        Args:
+          content: The text contents of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
+
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/messages",
+            body=maybe_transform(
+                {
+                    "content": content,
+                    "role": role,
+                    "attachments": attachments,
+                    "metadata": metadata,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    def retrieve(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Retrieve a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    def update(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Modifies a message.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/messages/{message_id}",
+            body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Message]:
+        """
+        Returns a list of messages for a given thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          run_id: Filter messages by the run ID that generated them.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/messages",
+            page=SyncCursorPage[Message],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "run_id": run_id,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=Message,
+        )
+
+    def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        content: Union[str, Iterable[MessageContentPartParam]],
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Create a message.
+
+        Args:
+          content: The text contents of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
+
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/messages",
+            body=await async_maybe_transform(
+                {
+                    "content": content,
+                    "role": role,
+                    "attachments": attachments,
+                    "metadata": metadata,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    async def retrieve(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Retrieve a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    async def update(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Modifies a message.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/messages/{message_id}",
+            body=await async_maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
+        """
+        Returns a list of messages for a given thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          run_id: Filter messages by the run ID that generated them.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/messages",
+            page=AsyncCursorPage[Message],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "run_id": run_id,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=Message,
+        )
+
+    async def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            messages.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            messages.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            messages.delete,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            messages.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            messages.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            messages.delete,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            messages.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            messages.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            messages.delete,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            messages.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            messages.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            messages.delete,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py
new file mode 100644
index 00000000..50aa9fae
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
+
+__all__ = [
+    "Steps",
+    "AsyncSteps",
+    "StepsWithRawResponse",
+    "AsyncStepsWithRawResponse",
+    "StepsWithStreamingResponse",
+    "AsyncStepsWithStreamingResponse",
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py
new file mode 100644
index 00000000..acb1c9b2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py
@@ -0,0 +1,2989 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import typing_extensions
+from typing import List, Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..... import _legacy_response
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....._streaming import Stream, AsyncStream
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import AsyncPaginator, make_request_options
+from .....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
+from .....types.beta.threads import (
+    run_list_params,
+    run_create_params,
+    run_update_params,
+    run_submit_tool_outputs_params,
+)
+from .....types.beta.threads.run import Run
+from .....types.shared.chat_model import ChatModel
+from .....types.shared_params.metadata import Metadata
+from .....types.shared.reasoning_effort import ReasoningEffort
+from .....types.beta.assistant_tool_param import AssistantToolParam
+from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+    @cached_property
+    def steps(self) -> Steps:
+        return Steps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RunsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def update(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Modifies a run.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Run]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=SyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
+
+    def cancel(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def create_and_poll(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            include=include,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning_effort=reasoning_effort,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+    def poll(
+        self,
+        run_id: str,
+        thread_id: str,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+        while True:
+            response = self.with_raw_response.retrieve(
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
+
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
+
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
+
+            self._sleep(poll_interval_ms / 1000)
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.submit_tool_outputs(
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def steps(self) -> AsyncSteps:
+        return AsyncSteps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRunsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    async def update(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Modifies a run.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=AsyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
+
+    async def cancel(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    async def create_and_poll(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            include=include,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning_effort=reasoning_effort,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                    "parallel_tool_calls": parallel_tool_calls,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+    async def poll(
+        self,
+        run_id: str,
+        thread_id: str,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+        while True:
+            response = await self.with_raw_response.retrieve(
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
+
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
+
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
+
+            await self._sleep(poll_interval_ms / 1000)
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=await async_maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.submit_tool_outputs(
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+
+class RunsWithRawResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            runs.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            runs.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> StepsWithRawResponse:
+        return StepsWithRawResponse(self._runs.steps)
+
+
+class AsyncRunsWithRawResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            runs.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            runs.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> AsyncStepsWithRawResponse:
+        return AsyncStepsWithRawResponse(self._runs.steps)
+
+
+class RunsWithStreamingResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            runs.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = to_streamed_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> StepsWithStreamingResponse:
+        return StepsWithStreamingResponse(self._runs.steps)
+
+
+class AsyncRunsWithStreamingResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = async_to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            runs.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = async_to_streamed_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> AsyncStepsWithStreamingResponse:
+        return AsyncStepsWithStreamingResponse(self._runs.steps)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py
new file mode 100644
index 00000000..709c729d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py
@@ -0,0 +1,381 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from ..... import _legacy_response
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import AsyncPaginator, make_request_options
+from .....types.beta.threads.runs import step_list_params, step_retrieve_params
+from .....types.beta.threads.runs.run_step import RunStep
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+
+__all__ = ["Steps", "AsyncSteps"]
+
+
+class Steps(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> StepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return StepsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> StepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return StepsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        step_id: str,
+        *,
+        thread_id: str,
+        run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunStep:
+        """
+        Retrieves a run step.
+
+        Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not step_id:
+            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
+            ),
+            cast_to=RunStep,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[RunStep]:
+        """
+        Returns a list of run steps belonging to a run.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs/{run_id}/steps",
+            page=SyncCursorPage[RunStep],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    step_list_params.StepListParams,
+                ),
+            ),
+            model=RunStep,
+        )
+
+
+class AsyncSteps(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncStepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncStepsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncStepsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        step_id: str,
+        *,
+        thread_id: str,
+        run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunStep:
+        """
+        Retrieves a run step.
+
+        Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not step_id:
+            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
+            ),
+            cast_to=RunStep,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[RunStep, AsyncCursorPage[RunStep]]:
+        """
+        Returns a list of run steps belonging to a run.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs/{run_id}/steps",
+            page=AsyncCursorPage[RunStep],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    step_list_params.StepListParams,
+                ),
+            ),
+            model=RunStep,
+        )
+
+
+class StepsWithRawResponse:
+    def __init__(self, steps: Steps) -> None:
+        self._steps = steps
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            steps.list,
+        )
+
+
+class AsyncStepsWithRawResponse:
+    def __init__(self, steps: AsyncSteps) -> None:
+        self._steps = steps
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            steps.list,
+        )
+
+
+class StepsWithStreamingResponse:
+    def __init__(self, steps: Steps) -> None:
+        self._steps = steps
+
+        self.retrieve = to_streamed_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            steps.list,
+        )
+
+
+class AsyncStepsWithStreamingResponse:
+    def __init__(self, steps: AsyncSteps) -> None:
+        self._steps = steps
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            steps.list,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py
new file mode 100644
index 00000000..d88559bd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py
@@ -0,0 +1,1875 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .... import _legacy_response
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from .runs.runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
+from ....types.beta import (
+    thread_create_params,
+    thread_update_params,
+    thread_create_and_run_params,
+)
+from ...._base_client import make_request_options
+from ....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
+from ....types.beta.thread import Thread
+from ....types.beta.threads.run import Run
+from ....types.shared.chat_model import ChatModel
+from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.assistant_stream_event import AssistantStreamEvent
+from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Threads", "AsyncThreads"]
+
+
+class Threads(SyncAPIResource):
+    @cached_property
+    def runs(self) -> Runs:
+        return Runs(self._client)
+
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ThreadsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Create a thread.
+
+        Args:
+          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+              start the thread with.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/threads",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_create_params.ThreadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    def retrieve(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Retrieves a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    def update(
+        self,
+        thread_id: str,
+        *,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Modifies a thread.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}",
+            body=maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    def delete(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ThreadDeleted:
+        """
+        Delete a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ThreadDeleted,
+        )
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create_and_run(
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            parallel_tool_calls=parallel_tool_calls,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "thread": thread,
+                    "tools": tools,
+                    "tool_resources": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncThreads(AsyncAPIResource):
+    @cached_property
+    def runs(self) -> AsyncRuns:
+        return AsyncRuns(self._client)
+
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncThreadsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Create a thread.
+
+        Args:
+          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+              start the thread with.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/threads",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_create_params.ThreadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    async def retrieve(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Retrieves a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    async def update(
+        self,
+        thread_id: str,
+        *,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Modifies a thread.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}",
+            body=await async_maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    async def delete(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ThreadDeleted:
+        """
+        Delete a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ThreadDeleted,
+        )
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/threads/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create_and_run(
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            parallel_tool_calls=parallel_tool_calls,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.runs.poll(
+            run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
+        )
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "thread": thread,
+                    "tools": tools,
+                    "tool_resources": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+
+class ThreadsWithRawResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            threads.update,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = _legacy_response.to_raw_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithRawResponse:
+        return RunsWithRawResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._threads.messages)
+
+
+class AsyncThreadsWithRawResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            threads.update,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = _legacy_response.async_to_raw_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._threads.messages)
+
+
+class ThreadsWithStreamingResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
+
+        self.create = to_streamed_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            threads.update,
+        )
+        self.delete = to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = to_streamed_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._threads.messages)
+
+
+class AsyncThreadsWithStreamingResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.create = async_to_streamed_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            threads.update,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = async_to_streamed_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._threads.messages)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/__init__.py
new file mode 100644
index 00000000..52dfdcea
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+    "Chat",
+    "AsyncChat",
+    "ChatWithRawResponse",
+    "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/chat.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/chat.py
new file mode 100644
index 00000000..14f9224b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/chat.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .completions.completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = ["Chat", "AsyncChat"]
+
+
+class Chat(SyncAPIResource):
+    @cached_property
+    def completions(self) -> Completions:
+        return Completions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ChatWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ChatWithStreamingResponse(self)
+
+
+class AsyncChat(AsyncAPIResource):
+    @cached_property
+    def completions(self) -> AsyncCompletions:
+        return AsyncCompletions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncChatWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncChatWithStreamingResponse(self)
+
+
+class ChatWithRawResponse:
+    def __init__(self, chat: Chat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsWithRawResponse:
+        return CompletionsWithRawResponse(self._chat.completions)
+
+
+class AsyncChatWithRawResponse:
+    def __init__(self, chat: AsyncChat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsWithRawResponse:
+        return AsyncCompletionsWithRawResponse(self._chat.completions)
+
+
+class ChatWithStreamingResponse:
+    def __init__(self, chat: Chat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsWithStreamingResponse:
+        return CompletionsWithStreamingResponse(self._chat.completions)
+
+
+class AsyncChatWithStreamingResponse:
+    def __init__(self, chat: AsyncChat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsWithStreamingResponse:
+        return AsyncCompletionsWithStreamingResponse(self._chat.completions)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/__init__.py
new file mode 100644
index 00000000..12d3b3aa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py
new file mode 100644
index 00000000..d28be012
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py
@@ -0,0 +1,2331 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import inspect
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
+
+import httpx
+import pydantic
+
+from .... import _legacy_response
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.chat import (
+    ChatCompletionAudioParam,
+    completion_list_params,
+    completion_create_params,
+    completion_update_params,
+)
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.shared.chat_model import ChatModel
+from ....types.chat.chat_completion import ChatCompletion
+from ....types.shared_params.metadata import Metadata
+from ....types.shared.reasoning_effort import ReasoningEffort
+from ....types.chat.chat_completion_chunk import ChatCompletionChunk
+from ....types.chat.chat_completion_deleted import ChatCompletionDeleted
+from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
+from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
+from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        validate_response_format(response_format)
+        return self._post(
+            "/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionChunk],
+        )
+
+    def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Get a stored chat completion.
+
+        Only Chat Completions that have been created with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def update(
+        self,
+        completion_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Modify a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be modified. Currently, the only
+        supported modification is to update the `metadata` field.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._post(
+            f"/chat/completions/{completion_id}",
+            body=maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[ChatCompletion]:
+        """List stored Chat Completions.
+
+        Only Chat Completions that have been stored with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last chat completion from the previous pagination request.
+
+          limit: Number of Chat Completions to retrieve.
+
+          metadata:
+              A list of metadata keys to filter the Chat Completions by. Example:
+
+              `metadata[key1]=value1&metadata[key2]=value2`
+
+          model: The model used to generate the Chat Completions.
+
+          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/chat/completions",
+            page=SyncCursorPage[ChatCompletion],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            model=ChatCompletion,
+        )
+
+    def delete(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionDeleted:
+        """Delete a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._delete(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletionDeleted,
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        validate_response_format(response_format)
+        return await self._post(
+            "/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionChunk],
+        )
+
+    async def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Get a stored chat completion.
+
+        Only Chat Completions that have been created with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._get(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    async def update(
+        self,
+        completion_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Modify a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be modified. Currently, the only
+        supported modification is to update the `metadata` field.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._post(
+            f"/chat/completions/{completion_id}",
+            body=await async_maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ChatCompletion, AsyncCursorPage[ChatCompletion]]:
+        """List stored Chat Completions.
+
+        Only Chat Completions that have been stored with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last chat completion from the previous pagination request.
+
+          limit: Number of Chat Completions to retrieve.
+
+          metadata:
+              A list of metadata keys to filter the Chat Completions by. Example:
+
+              `metadata[key1]=value1&metadata[key2]=value2`
+
+          model: The model used to generate the Chat Completions.
+
+          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/chat/completions",
+            page=AsyncCursorPage[ChatCompletion],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            model=ChatCompletion,
+        )
+
+    async def delete(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionDeleted:
+        """Delete a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._delete(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletionDeleted,
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            completions.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            completions.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._completions.messages)
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            completions.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            completions.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._completions.messages)
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            completions.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            completions.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._completions.messages)
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            completions.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            completions.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._completions.messages)
+
+
+def validate_response_format(response_format: object) -> None:
+    if inspect.isclass(response_format) and issubclass(response_format, pydantic.BaseModel):
+        raise TypeError(
+            "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `beta.chat.completions.parse()` instead"
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/messages.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/messages.py
new file mode 100644
index 00000000..fac15fba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/messages.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.chat.completions import message_list_params
+from ....types.chat.chat_completion_store_message import ChatCompletionStoreMessage
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    def list(
+        self,
+        completion_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[ChatCompletionStoreMessage]:
+        """Get the messages in a stored chat completion.
+
+        Only Chat Completions that have
+        been created with the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last message from the previous pagination request.
+
+          limit: Number of messages to retrieve.
+
+          order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc`
+              for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get_api_list(
+            f"/chat/completions/{completion_id}/messages",
+            page=SyncCursorPage[ChatCompletionStoreMessage],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=ChatCompletionStoreMessage,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    def list(
+        self,
+        completion_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ChatCompletionStoreMessage, AsyncCursorPage[ChatCompletionStoreMessage]]:
+        """Get the messages in a stored chat completion.
+
+        Only Chat Completions that have
+        been created with the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last message from the previous pagination request.
+
+          limit: Number of messages to retrieve.
+
+          order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc`
+              for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get_api_list(
+            f"/chat/completions/{completion_id}/messages",
+            page=AsyncCursorPage[ChatCompletionStoreMessage],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=ChatCompletionStoreMessage,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.list = to_streamed_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.list = async_to_streamed_response_wrapper(
+            messages.list,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/completions.py b/.venv/lib/python3.12/site-packages/openai/resources/completions.py
new file mode 100644
index 00000000..171f5093
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/completions.py
@@ -0,0 +1,1148 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .. import _legacy_response
+from ..types import completion_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._streaming import Stream, AsyncStream
+from .._base_client import (
+    make_request_options,
+)
+from ..types.completion import Completion
+from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: Literal[True],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: bool,
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | Stream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | Stream[Completion]:
+        return self._post(
+            "/completions",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "prompt": prompt,
+                    "best_of": best_of,
+                    "echo": echo,
+                    "frequency_penalty": frequency_penalty,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_tokens": max_tokens,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "seed": seed,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "suffix": suffix,
+                    "temperature": temperature,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Completion,
+            stream=stream or False,
+            stream_cls=Stream[Completion],
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: Literal[True],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: bool,
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | AsyncStream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | AsyncStream[Completion]:
+        return await self._post(
+            "/completions",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "prompt": prompt,
+                    "best_of": best_of,
+                    "echo": echo,
+                    "frequency_penalty": frequency_penalty,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_tokens": max_tokens,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "seed": seed,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "suffix": suffix,
+                    "temperature": temperature,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Completion,
+            stream=stream or False,
+            stream_cls=AsyncStream[Completion],
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py b/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py
new file mode 100644
index 00000000..a392d5eb
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py
@@ -0,0 +1,290 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import array
+import base64
+from typing import List, Union, Iterable, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import embedding_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import is_given, maybe_transform
+from .._compat import cached_property
+from .._extras import numpy as np, has_numpy
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import make_request_options
+from ..types.embedding_model import EmbeddingModel
+from ..types.create_embedding_response import CreateEmbeddingResponse
+
+__all__ = ["Embeddings", "AsyncEmbeddings"]
+
+
+class Embeddings(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return EmbeddingsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return EmbeddingsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, EmbeddingModel],
+        dimensions: int | NotGiven = NOT_GIVEN,
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CreateEmbeddingResponse:
+        """
+        Creates an embedding vector representing the input text.
+
+        Args:
+          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
+              inputs in a single request, pass an array of strings or array of token arrays.
+              The input must not exceed the max input tokens for the model (8192 tokens for
+              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              dimensions or less.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
+
+          encoding_format: The format to return the embeddings in. Can be either `float` or
+              [`base64`](https://pypi.org/project/pybase64/).
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        params = {
+            "input": input,
+            "model": model,
+            "user": user,
+            "dimensions": dimensions,
+            "encoding_format": encoding_format,
+        }
+        if not is_given(encoding_format):
+            params["encoding_format"] = "base64"
+
+        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
+            if is_given(encoding_format):
+                # don't modify the response object if a user explicitly asked for a format
+                return obj
+
+            for embedding in obj.data:
+                data = cast(object, embedding.embedding)
+                if not isinstance(data, str):
+                    continue
+                if not has_numpy():
+                    # use array for base64 optimisation
+                    embedding.embedding = array.array("f", base64.b64decode(data)).tolist()
+                else:
+                    embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
+                        base64.b64decode(data), dtype="float32"
+                    ).tolist()
+
+            return obj
+
+        return self._post(
+            "/embeddings",
+            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            cast_to=CreateEmbeddingResponse,
+        )
+
+
+class AsyncEmbeddings(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEmbeddingsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncEmbeddingsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, EmbeddingModel],
+        dimensions: int | NotGiven = NOT_GIVEN,
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CreateEmbeddingResponse:
+        """
+        Creates an embedding vector representing the input text.
+
+        Args:
+          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
+              inputs in a single request, pass an array of strings or array of token arrays.
+              The input must not exceed the max input tokens for the model (8192 tokens for
+              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              dimensions or less.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
+
+          encoding_format: The format to return the embeddings in. Can be either `float` or
+              [`base64`](https://pypi.org/project/pybase64/).
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        params = {
+            "input": input,
+            "model": model,
+            "user": user,
+            "dimensions": dimensions,
+            "encoding_format": encoding_format,
+        }
+        if not is_given(encoding_format):
+            params["encoding_format"] = "base64"
+
+        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
+            if is_given(encoding_format):
+                # don't modify the response object if a user explicitly asked for a format
+                return obj
+
+            for embedding in obj.data:
+                data = cast(object, embedding.embedding)
+                if not isinstance(data, str):
+                    continue
+                if not has_numpy():
+                    # use array for base64 optimisation
+                    embedding.embedding = array.array("f", base64.b64decode(data)).tolist()
+                else:
+                    embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
+                        base64.b64decode(data), dtype="float32"
+                    ).tolist()
+
+            return obj
+
+        return await self._post(
+            "/embeddings",
+            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            cast_to=CreateEmbeddingResponse,
+        )
+
+
+class EmbeddingsWithRawResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithRawResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class EmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = to_streamed_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = async_to_streamed_response_wrapper(
+            embeddings.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/files.py b/.venv/lib/python3.12/site-packages/openai/resources/files.py
new file mode 100644
index 00000000..2eaa4a64
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/files.py
@@ -0,0 +1,767 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import time
+import typing_extensions
+from typing import Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import FilePurpose, file_list_params, file_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from .._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.file_object import FileObject
+from ..types.file_deleted import FileDeleted
+from ..types.file_purpose import FilePurpose
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """Upload a file that can be used across various endpoints.
+
+        Individual files can be
+        up to 512 MB, and the size of all files uploaded by one organization can be up
+        to 100 GB.
+
+        The Assistants API supports files up to 2 million tokens and of specific file
+        types. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
+        details.
+
+        The Fine-tuning API only supports `.jsonl` files. The input also has certain
+        required formats for fine-tuning
+        [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+        [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+        models.
+
+        The Batch API only supports `.jsonl` files up to 200 MB in size. The input also
+        has a specific required
+        [format](https://platform.openai.com/docs/api-reference/batch/request-input).
+
+        Please [contact us](https://help.openai.com/) if you need to increase these
+        storage limits.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
+              Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
+              fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
+              Flexible file type for any purpose - `evals`: Used for eval data sets
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "purpose": purpose,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/files",
+            body=maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileObject,
+        )
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """
+        Returns information about a specific file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._get(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileObject,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        purpose: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FileObject]:
+        """Returns a list of files.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          purpose: Only return files with the given purpose.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/files",
+            page=SyncCursorPage[FileObject],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=FileObject,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleted:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._delete(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileDeleted,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    @typing_extensions.deprecated("The `.content()` method should be used instead")
+    def retrieve_content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=str,
+        )
+
+    def wait_for_processing(
+        self,
+        id: str,
+        *,
+        poll_interval: float = 5.0,
+        max_wait_seconds: float = 30 * 60,
+    ) -> FileObject:
+        """Waits for the given file to be processed, default timeout is 30 mins."""
+        TERMINAL_STATES = {"processed", "error", "deleted"}
+
+        start = time.time()
+        file = self.retrieve(id)
+        while file.status not in TERMINAL_STATES:
+            self._sleep(poll_interval)
+
+            file = self.retrieve(id)
+            if time.time() - start > max_wait_seconds:
+                raise RuntimeError(
+                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
+                )
+
+        return file
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """Upload a file that can be used across various endpoints.
+
+        Individual files can be
+        up to 512 MB, and the size of all files uploaded by one organization can be up
+        to 100 GB.
+
+        The Assistants API supports files up to 2 million tokens and of specific file
+        types. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
+        details.
+
+        The Fine-tuning API only supports `.jsonl` files. The input also has certain
+        required formats for fine-tuning
+        [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+        [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+        models.
+
+        The Batch API only supports `.jsonl` files up to 200 MB in size. The input also
+        has a specific required
+        [format](https://platform.openai.com/docs/api-reference/batch/request-input).
+
+        Please [contact us](https://help.openai.com/) if you need to increase these
+        storage limits.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
+              Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
+              fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
+              Flexible file type for any purpose - `evals`: Used for eval data sets
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "purpose": purpose,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/files",
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileObject,
+        )
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """
+        Returns information about a specific file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._get(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileObject,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        purpose: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FileObject, AsyncCursorPage[FileObject]]:
+        """Returns a list of files.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          purpose: Only return files with the given purpose.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/files",
+            page=AsyncCursorPage[FileObject],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=FileObject,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleted:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._delete(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileDeleted,
+        )
+
+    async def content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    @typing_extensions.deprecated("The `.content()` method should be used instead")
+    async def retrieve_content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=str,
+        )
+
+    async def wait_for_processing(
+        self,
+        id: str,
+        *,
+        poll_interval: float = 5.0,
+        max_wait_seconds: float = 30 * 60,
+    ) -> FileObject:
+        """Waits for the given file to be processed, default timeout is 30 mins."""
+        TERMINAL_STATES = {"processed", "error", "deleted"}
+
+        start = time.time()
+        file = await self.retrieve(id)
+        while file.status not in TERMINAL_STATES:
+            await self._sleep(poll_interval)
+
+            file = await self.retrieve(id)
+            if time.time() - start > max_wait_seconds:
+                raise RuntimeError(
+                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
+                )
+
+        return file
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.to_raw_response_wrapper(
+            files.content,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = to_custom_streamed_response_wrapper(
+            files.content,
+            StreamedBinaryAPIResponse,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = async_to_custom_streamed_response_wrapper(
+            files.content,
+            AsyncStreamedBinaryAPIResponse,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/__init__.py
new file mode 100644
index 00000000..7765231f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .fine_tuning import (
+    FineTuning,
+    AsyncFineTuning,
+    FineTuningWithRawResponse,
+    AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
+)
+
+__all__ = [
+    "Jobs",
+    "AsyncJobs",
+    "JobsWithRawResponse",
+    "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+    "FineTuning",
+    "AsyncFineTuning",
+    "FineTuningWithRawResponse",
+    "AsyncFineTuningWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/fine_tuning.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/fine_tuning.py
new file mode 100644
index 00000000..eebde07d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/fine_tuning.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from .jobs.jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["FineTuning", "AsyncFineTuning"]
+
+
+class FineTuning(SyncAPIResource):
+    @cached_property
+    def jobs(self) -> Jobs:
+        return Jobs(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> FineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FineTuningWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FineTuningWithStreamingResponse(self)
+
+
+class AsyncFineTuning(AsyncAPIResource):
+    @cached_property
+    def jobs(self) -> AsyncJobs:
+        return AsyncJobs(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFineTuningWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFineTuningWithStreamingResponse(self)
+
+
+class FineTuningWithRawResponse:
+    def __init__(self, fine_tuning: FineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> JobsWithRawResponse:
+        return JobsWithRawResponse(self._fine_tuning.jobs)
+
+
+class AsyncFineTuningWithRawResponse:
+    def __init__(self, fine_tuning: AsyncFineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> AsyncJobsWithRawResponse:
+        return AsyncJobsWithRawResponse(self._fine_tuning.jobs)
+
+
+class FineTuningWithStreamingResponse:
+    def __init__(self, fine_tuning: FineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> JobsWithStreamingResponse:
+        return JobsWithStreamingResponse(self._fine_tuning.jobs)
+
+
+class AsyncFineTuningWithStreamingResponse:
+    def __init__(self, fine_tuning: AsyncFineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> AsyncJobsWithStreamingResponse:
+        return AsyncJobsWithStreamingResponse(self._fine_tuning.jobs)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 00000000..94cd1fb7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+
+__all__ = [
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+    "Jobs",
+    "AsyncJobs",
+    "JobsWithRawResponse",
+    "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/checkpoints.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/checkpoints.py
new file mode 100644
index 00000000..f86462e5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -0,0 +1,199 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning.jobs import checkpoint_list_params
+from ....types.fine_tuning.jobs.fine_tuning_job_checkpoint import FineTuningJobCheckpoint
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJobCheckpoint]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=SyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=AsyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = to_streamed_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = async_to_streamed_response_wrapper(
+            checkpoints.list,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/jobs.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/jobs.py
new file mode 100644
index 00000000..bbeff60b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/jobs.py
@@ -0,0 +1,761 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning import job_list_params, job_create_params, job_list_events_params
+from ....types.shared_params.metadata import Metadata
+from ....types.fine_tuning.fine_tuning_job import FineTuningJob
+from ....types.fine_tuning.fine_tuning_job_event import FineTuningJobEvent
+
+__all__ = ["Jobs", "AsyncJobs"]
+
+
+class Jobs(SyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> JobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return JobsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> JobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return JobsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]],
+        training_file: str,
+        hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Creates a fine-tuning job which begins the process of creating a new model from
+        a given dataset.
+
+        Response includes details of the enqueued job including job status and the name
+        of the fine-tuned models once complete.
+
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+
+        Args:
+          model: The name of the model to fine-tune. You can select one of the
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
+
+          training_file: The ID of an uploaded file that contains training data.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your dataset must be formatted as a JSONL file. Additionally, you must upload
+              your file with the purpose `fine-tune`.
+
+              The contents of the file should differ depending on if the model uses the
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+              [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+              format.
+
+              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              for more details.
+
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
+
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          method: The method used for fine-tuning.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
+              name.
+
+              For example, a `suffix` of "custom-model-name" would produce a model name like
+              `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
+
+          validation_file: The ID of an uploaded file that contains validation data.
+
+              If you provide this file, the data is used to generate validation metrics
+              periodically during fine-tuning. These metrics can be viewed in the fine-tuning
+              results file. The same data should not be present in both train and validation
+              files.
+
+              Your dataset must be formatted as a JSONL file. You must upload your file with
+              the purpose `fine-tune`.
+
+              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/fine_tuning/jobs",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "training_file": training_file,
+                    "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "metadata": metadata,
+                    "method": method,
+                    "seed": seed,
+                    "suffix": suffix,
+                    "validation_file": validation_file,
+                },
+                job_create_params.JobCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def retrieve(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Get info about a fine-tuning job.
+
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJob]:
+        """
+        List your organization's fine-tuning jobs
+
+        Args:
+          after: Identifier for the last job from the previous pagination request.
+
+          limit: Number of fine-tuning jobs to retrieve.
+
+          metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`.
+              Alternatively, set `metadata=null` to indicate no metadata.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/fine_tuning/jobs",
+            page=SyncCursorPage[FineTuningJob],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                    },
+                    job_list_params.JobListParams,
+                ),
+            ),
+            model=FineTuningJob,
+        )
+
+    def cancel(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Immediately cancel a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def list_events(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJobEvent]:
+        """
+        Get status updates for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last event from the previous pagination request.
+
+          limit: Number of events to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
+            page=SyncCursorPage[FineTuningJobEvent],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    job_list_events_params.JobListEventsParams,
+                ),
+            ),
+            model=FineTuningJobEvent,
+        )
+
+
+class AsyncJobs(AsyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncJobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncJobsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncJobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncJobsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]],
+        training_file: str,
+        hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Creates a fine-tuning job which begins the process of creating a new model from
+        a given dataset.
+
+        Response includes details of the enqueued job including job status and the name
+        of the fine-tuned models once complete.
+
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+
+        Args:
+          model: The name of the model to fine-tune. You can select one of the
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
+
+          training_file: The ID of an uploaded file that contains training data.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your dataset must be formatted as a JSONL file. Additionally, you must upload
+              your file with the purpose `fine-tune`.
+
+              The contents of the file should differ depending on if the model uses the
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+              [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+              format.
+
+              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              for more details.
+
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
+
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          method: The method used for fine-tuning.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
+              name.
+
+              For example, a `suffix` of "custom-model-name" would produce a model name like
+              `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
+
+          validation_file: The ID of an uploaded file that contains validation data.
+
+              If you provide this file, the data is used to generate validation metrics
+              periodically during fine-tuning. These metrics can be viewed in the fine-tuning
+              results file. The same data should not be present in both train and validation
+              files.
+
+              Your dataset must be formatted as a JSONL file. You must upload your file with
+              the purpose `fine-tune`.
+
+              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/fine_tuning/jobs",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "training_file": training_file,
+                    "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "metadata": metadata,
+                    "method": method,
+                    "seed": seed,
+                    "suffix": suffix,
+                    "validation_file": validation_file,
+                },
+                job_create_params.JobCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    async def retrieve(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Get info about a fine-tuning job.
+
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return await self._get(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJob, AsyncCursorPage[FineTuningJob]]:
+        """
+        List your organization's fine-tuning jobs
+
+        Args:
+          after: Identifier for the last job from the previous pagination request.
+
+          limit: Number of fine-tuning jobs to retrieve.
+
+          metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`.
+              Alternatively, set `metadata=null` to indicate no metadata.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/fine_tuning/jobs",
+            page=AsyncCursorPage[FineTuningJob],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                    },
+                    job_list_params.JobListParams,
+                ),
+            ),
+            model=FineTuningJob,
+        )
+
+    async def cancel(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Immediately cancel a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return await self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def list_events(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJobEvent, AsyncCursorPage[FineTuningJobEvent]]:
+        """
+        Get status updates for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last event from the previous pagination request.
+
+          limit: Number of events to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
+            page=AsyncCursorPage[FineTuningJobEvent],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    job_list_events_params.JobListEventsParams,
+                ),
+            ),
+            model=FineTuningJobEvent,
+        )
+
+
+class JobsWithRawResponse:
+    def __init__(self, jobs: Jobs) -> None:
+        self._jobs = jobs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = _legacy_response.to_raw_response_wrapper(
+            jobs.list_events,
+        )
+
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._jobs.checkpoints)
+
+
+class AsyncJobsWithRawResponse:
+    def __init__(self, jobs: AsyncJobs) -> None:
+        self._jobs = jobs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = _legacy_response.async_to_raw_response_wrapper(
+            jobs.list_events,
+        )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints)
+
+
+class JobsWithStreamingResponse:
+    def __init__(self, jobs: Jobs) -> None:
+        self._jobs = jobs
+
+        self.create = to_streamed_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = to_streamed_response_wrapper(
+            jobs.list_events,
+        )
+
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._jobs.checkpoints)
+
+
+class AsyncJobsWithStreamingResponse:
+    def __init__(self, jobs: AsyncJobs) -> None:
+        self._jobs = jobs
+
+        self.create = async_to_streamed_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = async_to_streamed_response_wrapper(
+            jobs.list_events,
+        )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/images.py b/.venv/lib/python3.12/site-packages/openai/resources/images.py
new file mode 100644
index 00000000..30473c14
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/images.py
@@ -0,0 +1,600 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Mapping, Optional, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import image_edit_params, image_generate_params, image_create_variation_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from .._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import make_request_options
+from ..types.image_model import ImageModel
+from ..types.images_response import ImagesResponse
+
+__all__ = ["Images", "AsyncImages"]
+
+
+class Images(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ImagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ImagesWithStreamingResponse(self)
+
+    def create_variation(
+        self,
+        *,
+        image: FileTypes,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates a variation of a given image.
+
+        Args:
+          image: The image to use as the basis for the variation(s). Must be a valid PNG file,
+              less than 4MB, and square.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/images/variations",
+            body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    def edit(
+        self,
+        *,
+        image: FileTypes,
+        prompt: str,
+        mask: FileTypes | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an edited or extended image given an original image and a prompt.
+
+        Args:
+          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
+              is not provided, image must have transparency, which will be used as the mask.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "prompt": prompt,
+                "mask": mask,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/images/edits",
+            body=maybe_transform(body, image_edit_params.ImageEditParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    def generate(
+        self,
+        *,
+        prompt: str,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
+        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an image given a prompt.
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+
+          model: The model to use for image generation.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          quality: The quality of the image that will be generated. `hd` creates images with finer
+              details and greater consistency across the image. This param is only supported
+              for `dall-e-3`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
+              `1024x1792` for `dall-e-3` models.
+
+          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
+              causes the model to lean towards generating hyper-real and dramatic images.
+              Natural causes the model to produce more natural, less hyper-real looking
+              images. This param is only supported for `dall-e-3`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/images/generations",
+            body=maybe_transform(
+                {
+                    "prompt": prompt,
+                    "model": model,
+                    "n": n,
+                    "quality": quality,
+                    "response_format": response_format,
+                    "size": size,
+                    "style": style,
+                    "user": user,
+                },
+                image_generate_params.ImageGenerateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+
+class AsyncImages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncImagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncImagesWithStreamingResponse(self)
+
+    async def create_variation(
+        self,
+        *,
+        image: FileTypes,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates a variation of a given image.
+
+        Args:
+          image: The image to use as the basis for the variation(s). Must be a valid PNG file,
+              less than 4MB, and square.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/images/variations",
+            body=await async_maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    async def edit(
+        self,
+        *,
+        image: FileTypes,
+        prompt: str,
+        mask: FileTypes | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an edited or extended image given an original image and a prompt.
+
+        Args:
+          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
+              is not provided, image must have transparency, which will be used as the mask.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "prompt": prompt,
+                "mask": mask,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/images/edits",
+            body=await async_maybe_transform(body, image_edit_params.ImageEditParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
+        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an image given a prompt.
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+
+          model: The model to use for image generation.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          quality: The quality of the image that will be generated. `hd` creates images with finer
+              details and greater consistency across the image. This param is only supported
+              for `dall-e-3`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
+              `1024x1792` for `dall-e-3` models.
+
+          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
+              causes the model to lean towards generating hyper-real and dramatic images.
+              Natural causes the model to produce more natural, less hyper-real looking
+              images. This param is only supported for `dall-e-3`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/images/generations",
+            body=await async_maybe_transform(
+                {
+                    "prompt": prompt,
+                    "model": model,
+                    "n": n,
+                    "quality": quality,
+                    "response_format": response_format,
+                    "size": size,
+                    "style": style,
+                    "user": user,
+                },
+                image_generate_params.ImageGenerateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+
+class ImagesWithRawResponse:
+    def __init__(self, images: Images) -> None:
+        self._images = images
+
+        self.create_variation = _legacy_response.to_raw_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = _legacy_response.to_raw_response_wrapper(
+            images.edit,
+        )
+        self.generate = _legacy_response.to_raw_response_wrapper(
+            images.generate,
+        )
+
+
+class AsyncImagesWithRawResponse:
+    def __init__(self, images: AsyncImages) -> None:
+        self._images = images
+
+        self.create_variation = _legacy_response.async_to_raw_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = _legacy_response.async_to_raw_response_wrapper(
+            images.edit,
+        )
+        self.generate = _legacy_response.async_to_raw_response_wrapper(
+            images.generate,
+        )
+
+
+class ImagesWithStreamingResponse:
+    def __init__(self, images: Images) -> None:
+        self._images = images
+
+        self.create_variation = to_streamed_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = to_streamed_response_wrapper(
+            images.edit,
+        )
+        self.generate = to_streamed_response_wrapper(
+            images.generate,
+        )
+
+
+class AsyncImagesWithStreamingResponse:
+    def __init__(self, images: AsyncImages) -> None:
+        self._images = images
+
+        self.create_variation = async_to_streamed_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = async_to_streamed_response_wrapper(
+            images.edit,
+        )
+        self.generate = async_to_streamed_response_wrapper(
+            images.generate,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/models.py b/.venv/lib/python3.12/site-packages/openai/resources/models.py
new file mode 100644
index 00000000..a9693a6b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/models.py
@@ -0,0 +1,306 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .. import _legacy_response
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncPage, AsyncPage
+from ..types.model import Model
+from .._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ..types.model_deleted import ModelDeleted
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Model:
+        """
+        Retrieves a model instance, providing basic information about the model such as
+        the owner and permissioning.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return self._get(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Model,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[Model]:
+        """
+        Lists the currently available models, and provides basic information about each
+        one such as the owner and availability.
+        """
+        return self._get_api_list(
+            "/models",
+            page=SyncPage[Model],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=Model,
+        )
+
+    def delete(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelDeleted:
+        """Delete a fine-tuned model.
+
+        You must have the Owner role in your organization to
+        delete a model.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return self._delete(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelDeleted,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Model:
+        """
+        Retrieves a model instance, providing basic information about the model such as
+        the owner and permissioning.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return await self._get(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Model,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Model, AsyncPage[Model]]:
+        """
+        Lists the currently available models, and provides basic information about each
+        one such as the owner and availability.
+        """
+        return self._get_api_list(
+            "/models",
+            page=AsyncPage[Model],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=Model,
+        )
+
+    async def delete(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelDeleted:
+        """Delete a fine-tuned model.
+
+        You must have the Owner role in your organization to
+        delete a model.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return await self._delete(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelDeleted,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            models.delete,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/moderations.py b/.venv/lib/python3.12/site-packages/openai/resources/moderations.py
new file mode 100644
index 00000000..a8f03142
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/moderations.py
@@ -0,0 +1,200 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+
+import httpx
+
+from .. import _legacy_response
+from ..types import moderation_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import make_request_options
+from ..types.moderation_model import ModerationModel
+from ..types.moderation_create_response import ModerationCreateResponse
+from ..types.moderation_multi_modal_input_param import ModerationMultiModalInputParam
+
+__all__ = ["Moderations", "AsyncModerations"]
+
+
+class Moderations(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ModerationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ModerationsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModerationCreateResponse:
+        """Classifies if text and/or image inputs are potentially harmful.
+
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
+
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
+
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/moderations",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                },
+                moderation_create_params.ModerationCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModerationCreateResponse,
+        )
+
+
+class AsyncModerations(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModerationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncModerationsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModerationCreateResponse:
+        """Classifies if text and/or image inputs are potentially harmful.
+
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
+
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
+
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/moderations",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                },
+                moderation_create_params.ModerationCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModerationCreateResponse,
+        )
+
+
+class ModerationsWithRawResponse:
+    def __init__(self, moderations: Moderations) -> None:
+        self._moderations = moderations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            moderations.create,
+        )
+
+
+class AsyncModerationsWithRawResponse:
+    def __init__(self, moderations: AsyncModerations) -> None:
+        self._moderations = moderations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            moderations.create,
+        )
+
+
+class ModerationsWithStreamingResponse:
+    def __init__(self, moderations: Moderations) -> None:
+        self._moderations = moderations
+
+        self.create = to_streamed_response_wrapper(
+            moderations.create,
+        )
+
+
+class AsyncModerationsWithStreamingResponse:
+    def __init__(self, moderations: AsyncModerations) -> None:
+        self._moderations = moderations
+
+        self.create = async_to_streamed_response_wrapper(
+            moderations.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/responses/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/responses/__init__.py
new file mode 100644
index 00000000..ad19218b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/responses/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .responses import (
+    Responses,
+    AsyncResponses,
+    ResponsesWithRawResponse,
+    AsyncResponsesWithRawResponse,
+    ResponsesWithStreamingResponse,
+    AsyncResponsesWithStreamingResponse,
+)
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+
+__all__ = [
+    "InputItems",
+    "AsyncInputItems",
+    "InputItemsWithRawResponse",
+    "AsyncInputItemsWithRawResponse",
+    "InputItemsWithStreamingResponse",
+    "AsyncInputItemsWithStreamingResponse",
+    "Responses",
+    "AsyncResponses",
+    "ResponsesWithRawResponse",
+    "AsyncResponsesWithRawResponse",
+    "ResponsesWithStreamingResponse",
+    "AsyncResponsesWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/responses/input_items.py b/.venv/lib/python3.12/site-packages/openai/resources/responses/input_items.py
new file mode 100644
index 00000000..e341393c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/responses/input_items.py
@@ -0,0 +1,223 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.responses import input_item_list_params
+from ...types.responses.response_item import ResponseItem
+
+__all__ = ["InputItems", "AsyncInputItems"]
+
+
+class InputItems(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> InputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return InputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> InputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return InputItemsWithStreamingResponse(self)
+
+    def list(
+        self,
+        response_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[ResponseItem]:
+        """
+        Returns a list of input items for a given response.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          before: An item ID to list items before, used in pagination.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `asc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get_api_list(
+            f"/responses/{response_id}/input_items",
+            page=SyncCursorPage[ResponseItem],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    input_item_list_params.InputItemListParams,
+                ),
+            ),
+            model=cast(Any, ResponseItem),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class AsyncInputItems(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncInputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncInputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncInputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncInputItemsWithStreamingResponse(self)
+
+    def list(
+        self,
+        response_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ResponseItem, AsyncCursorPage[ResponseItem]]:
+        """
+        Returns a list of input items for a given response.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          before: An item ID to list items before, used in pagination.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `asc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get_api_list(
+            f"/responses/{response_id}/input_items",
+            page=AsyncCursorPage[ResponseItem],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    input_item_list_params.InputItemListParams,
+                ),
+            ),
+            model=cast(Any, ResponseItem),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class InputItemsWithRawResponse:
+    def __init__(self, input_items: InputItems) -> None:
+        self._input_items = input_items
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            input_items.list,
+        )
+
+
+class AsyncInputItemsWithRawResponse:
+    def __init__(self, input_items: AsyncInputItems) -> None:
+        self._input_items = input_items
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            input_items.list,
+        )
+
+
+class InputItemsWithStreamingResponse:
+    def __init__(self, input_items: InputItems) -> None:
+        self._input_items = input_items
+
+        self.list = to_streamed_response_wrapper(
+            input_items.list,
+        )
+
+
+class AsyncInputItemsWithStreamingResponse:
+    def __init__(self, input_items: AsyncInputItems) -> None:
+        self._input_items = input_items
+
+        self.list = async_to_streamed_response_wrapper(
+            input_items.list,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py b/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py
new file mode 100644
index 00000000..668f4db8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py
@@ -0,0 +1,1791 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, List, Type, Union, Iterable, Optional, cast
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from ..._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+from ..._streaming import Stream, AsyncStream
+from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool
+from ..._base_client import make_request_options
+from ...types.responses import response_create_params, response_retrieve_params
+from ...lib._parsing._responses import (
+    TextFormatT,
+    parse_response,
+    type_to_text_format_param as _type_to_text_format_param,
+)
+from ...types.shared.chat_model import ChatModel
+from ...types.responses.response import Response
+from ...types.responses.tool_param import ToolParam, ParseableToolParam
+from ...types.shared_params.metadata import Metadata
+from ...types.shared_params.reasoning import Reasoning
+from ...types.responses.parsed_response import ParsedResponse
+from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
+from ...types.responses.response_includable import ResponseIncludable
+from ...types.shared_params.responses_model import ResponsesModel
+from ...types.responses.response_input_param import ResponseInputParam
+from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_text_config_param import ResponseTextConfigParam
+
+__all__ = ["Responses", "AsyncResponses"]
+
+
+class Responses(SyncAPIResource):
+    @cached_property
+    def input_items(self) -> InputItems:
+        return InputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ResponsesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: Literal[True],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: bool,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ResponseStreamManager[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        api_request: partial[Stream[ResponseStreamEvent]] = partial(
+            self.create,
+            input=input,
+            model=model,
+            tools=tools,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            store=store,
+            stream=True,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            reasoning=reasoning,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+
+        return ResponseStreamManager(
+            api_request,
+            text_format=text_format,
+            input_tools=tools,
+        )
+
+    def parse(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedResponse[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
+            return parse_response(
+                input_tools=tools,
+                text_format=text_format,
+                response=raw_response,
+            )
+
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `Response` instance into a `ParsedResponse`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
+        )
+
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, response_retrieve_params.ResponseRetrieveParams),
+            ),
+            cast_to=Response,
+        )
+
+    def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncResponses(AsyncAPIResource):
+    @cached_property
+    def input_items(self) -> AsyncInputItems:
+        return AsyncInputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncResponsesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: Literal[True],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: bool,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        return await self._post(
+            "/responses",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncResponseStreamManager[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        api_request = self.create(
+            input=input,
+            model=model,
+            tools=tools,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            store=store,
+            stream=True,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            reasoning=reasoning,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+
+        return AsyncResponseStreamManager(
+            api_request,
+            text_format=text_format,
+            input_tools=tools,
+        )
+
+    async def parse(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedResponse[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
+            return parse_response(
+                input_tools=tools,
+                text_format=text_format,
+                response=raw_response,
+            )
+
+        return await self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `Response` instance into a `ParsedResponse`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
+        )
+
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return await self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {"include": include}, response_retrieve_params.ResponseRetrieveParams
+                ),
+            ),
+            cast_to=Response,
+        )
+
+    async def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class ResponsesWithRawResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithRawResponse:
+        return InputItemsWithRawResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithRawResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithRawResponse:
+        return AsyncInputItemsWithRawResponse(self._responses.input_items)
+
+
+class ResponsesWithStreamingResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithStreamingResponse:
+        return InputItemsWithStreamingResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithStreamingResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = async_to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithStreamingResponse:
+        return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
+
+
+def _make_tools(tools: Iterable[ParseableToolParam] | NotGiven) -> List[ToolParam] | NotGiven:
+    if not is_given(tools):
+        return NOT_GIVEN
+
+    converted_tools: List[ToolParam] = []
+    for tool in tools:
+        if tool["type"] != "function":
+            converted_tools.append(tool)
+            continue
+
+        if "function" not in tool:
+            # standard Responses API case
+            converted_tools.append(tool)
+            continue
+
+        function = cast(Any, tool)["function"]  # pyright: ignore[reportUnnecessaryCast]
+        if not isinstance(function, PydanticFunctionTool):
+            raise Exception(
+                "Expected Chat Completions function tool shape to be created using `openai.pydantic_function_tool()`"
+            )
+
+        assert "parameters" in function
+        new_tool = ResponsesPydanticFunctionTool(
+            {
+                "type": "function",
+                "name": function["name"],
+                "description": function.get("description"),
+                "parameters": function["parameters"],
+                "strict": function.get("strict") or False,
+            },
+            function.model,
+        )
+
+        converted_tools.append(new_tool.cast())
+
+    return converted_tools
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py.orig b/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py.orig
new file mode 100644
index 00000000..dec4c193
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py.orig
@@ -0,0 +1,1796 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, List, Type, Union, Iterable, Optional, cast
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from ..._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+from ..._streaming import Stream, AsyncStream
+from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool
+from ..._base_client import make_request_options
+from ...types.responses import response_create_params, response_retrieve_params
+<<<<<<< HEAD
+from ...lib._parsing._responses import (
+    TextFormatT,
+    parse_response,
+    type_to_text_format_param as _type_to_text_format_param,
+)
+from ...types.shared.chat_model import ChatModel
+||||||| parent of 001707b8 (feat(api): o1-pro now available through the API (#2228))
+from ...types.shared.chat_model import ChatModel
+=======
+>>>>>>> 001707b8 (feat(api): o1-pro now available through the API (#2228))
+from ...types.responses.response import Response
+from ...types.responses.tool_param import ToolParam, ParseableToolParam
+from ...types.shared_params.metadata import Metadata
+from ...types.shared_params.reasoning import Reasoning
+from ...types.responses.parsed_response import ParsedResponse
+from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
+from ...types.responses.response_includable import ResponseIncludable
+from ...types.shared_params.responses_model import ResponsesModel
+from ...types.responses.response_input_param import ResponseInputParam
+from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_text_config_param import ResponseTextConfigParam
+
+__all__ = ["Responses", "AsyncResponses"]
+
+
+class Responses(SyncAPIResource):
+    @cached_property
+    def input_items(self) -> InputItems:
+        return InputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ResponsesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: Literal[True],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: bool,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ResponseStreamManager[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        api_request: partial[Stream[ResponseStreamEvent]] = partial(
+            self.create,
+            input=input,
+            model=model,
+            tools=tools,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            store=store,
+            stream=True,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            reasoning=reasoning,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+
+        return ResponseStreamManager(
+            api_request,
+            text_format=text_format,
+            input_tools=tools,
+        )
+
+    def parse(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedResponse[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
+            return parse_response(
+                input_tools=tools,
+                text_format=text_format,
+                response=raw_response,
+            )
+
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `Response` instance into a `ParsedResponse`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
+        )
+
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, response_retrieve_params.ResponseRetrieveParams),
+            ),
+            cast_to=Response,
+        )
+
+    def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncResponses(AsyncAPIResource):
+    @cached_property
+    def input_items(self) -> AsyncInputItems:
+        return AsyncInputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncResponsesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: Literal[True],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: bool,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        return await self._post(
+            "/responses",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncResponseStreamManager[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        api_request = self.create(
+            input=input,
+            model=model,
+            tools=tools,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            store=store,
+            stream=True,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            reasoning=reasoning,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+
+        return AsyncResponseStreamManager(
+            api_request,
+            text_format=text_format,
+            input_tools=tools,
+        )
+
+    async def parse(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedResponse[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
+            return parse_response(
+                input_tools=tools,
+                text_format=text_format,
+                response=raw_response,
+            )
+
+        return await self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `Response` instance into a `ParsedResponse`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
+        )
+
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return await self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {"include": include}, response_retrieve_params.ResponseRetrieveParams
+                ),
+            ),
+            cast_to=Response,
+        )
+
+    async def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class ResponsesWithRawResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithRawResponse:
+        return InputItemsWithRawResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithRawResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithRawResponse:
+        return AsyncInputItemsWithRawResponse(self._responses.input_items)
+
+
+class ResponsesWithStreamingResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithStreamingResponse:
+        return InputItemsWithStreamingResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithStreamingResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = async_to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithStreamingResponse:
+        return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
+
+
+def _make_tools(tools: Iterable[ParseableToolParam] | NotGiven) -> List[ToolParam] | NotGiven:
+    if not is_given(tools):
+        return NOT_GIVEN
+
+    converted_tools: List[ToolParam] = []
+    for tool in tools:
+        if tool["type"] != "function":
+            converted_tools.append(tool)
+            continue
+
+        if "function" not in tool:
+            # standard Responses API case
+            converted_tools.append(tool)
+            continue
+
+        function = cast(Any, tool)["function"]  # pyright: ignore[reportUnnecessaryCast]
+        if not isinstance(function, PydanticFunctionTool):
+            raise Exception(
+                "Expected Chat Completions function tool shape to be created using `openai.pydantic_function_tool()`"
+            )
+
+        assert "parameters" in function
+        new_tool = ResponsesPydanticFunctionTool(
+            {
+                "type": "function",
+                "name": function["name"],
+                "description": function.get("description"),
+                "parameters": function["parameters"],
+                "strict": function.get("strict") or False,
+            },
+            function.model,
+        )
+
+        converted_tools.append(new_tool.cast())
+
+    return converted_tools
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/uploads/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/uploads/__init__.py
new file mode 100644
index 00000000..12d1056f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/uploads/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .parts import (
+    Parts,
+    AsyncParts,
+    PartsWithRawResponse,
+    AsyncPartsWithRawResponse,
+    PartsWithStreamingResponse,
+    AsyncPartsWithStreamingResponse,
+)
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+
+__all__ = [
+    "Parts",
+    "AsyncParts",
+    "PartsWithRawResponse",
+    "AsyncPartsWithRawResponse",
+    "PartsWithStreamingResponse",
+    "AsyncPartsWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/uploads/parts.py b/.venv/lib/python3.12/site-packages/openai/resources/uploads/parts.py
new file mode 100644
index 00000000..777469ac
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/uploads/parts.py
@@ -0,0 +1,210 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Mapping, cast
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.uploads import part_create_params
+from ...types.uploads.upload_part import UploadPart
+
+__all__ = ["Parts", "AsyncParts"]
+
+
+class Parts(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> PartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return PartsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return PartsWithStreamingResponse(self)
+
+    def create(
+        self,
+        upload_id: str,
+        *,
+        data: FileTypes,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> UploadPart:
+        """
+        Adds a
+        [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+        A Part represents a chunk of bytes from the file you are trying to upload.
+
+        Each Part can be at most 64 MB, and you can add Parts until you hit the Upload
+        maximum of 8 GB.
+
+        It is possible to add multiple Parts in parallel. You can decide the intended
+        order of the Parts when you
+        [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+
+        Args:
+          data: The chunk of bytes for this Part.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        body = deepcopy_minimal({"data": data})
+        files = extract_files(cast(Mapping[str, object], body), paths=[["data"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            f"/uploads/{upload_id}/parts",
+            body=maybe_transform(body, part_create_params.PartCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=UploadPart,
+        )
+
+
+class AsyncParts(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncPartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPartsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncPartsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        upload_id: str,
+        *,
+        data: FileTypes,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> UploadPart:
+        """
+        Adds a
+        [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+        A Part represents a chunk of bytes from the file you are trying to upload.
+
+        Each Part can be at most 64 MB, and you can add Parts until you hit the Upload
+        maximum of 8 GB.
+
+        It is possible to add multiple Parts in parallel. You can decide the intended
+        order of the Parts when you
+        [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+
+        Args:
+          data: The chunk of bytes for this Part.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        body = deepcopy_minimal({"data": data})
+        files = extract_files(cast(Mapping[str, object], body), paths=[["data"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            f"/uploads/{upload_id}/parts",
+            body=await async_maybe_transform(body, part_create_params.PartCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=UploadPart,
+        )
+
+
+class PartsWithRawResponse:
+    def __init__(self, parts: Parts) -> None:
+        self._parts = parts
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            parts.create,
+        )
+
+
+class AsyncPartsWithRawResponse:
+    def __init__(self, parts: AsyncParts) -> None:
+        self._parts = parts
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            parts.create,
+        )
+
+
+class PartsWithStreamingResponse:
+    def __init__(self, parts: Parts) -> None:
+        self._parts = parts
+
+        self.create = to_streamed_response_wrapper(
+            parts.create,
+        )
+
+
+class AsyncPartsWithStreamingResponse:
+    def __init__(self, parts: AsyncParts) -> None:
+        self._parts = parts
+
+        self.create = async_to_streamed_response_wrapper(
+            parts.create,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/uploads/uploads.py b/.venv/lib/python3.12/site-packages/openai/resources/uploads/uploads.py
new file mode 100644
index 00000000..9297dbc2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/uploads/uploads.py
@@ -0,0 +1,714 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import io
+import os
+import logging
+import builtins
+from typing import List, overload
+from pathlib import Path
+
+import anyio
+import httpx
+
+from ... import _legacy_response
+from .parts import (
+    Parts,
+    AsyncParts,
+    PartsWithRawResponse,
+    AsyncPartsWithRawResponse,
+    PartsWithStreamingResponse,
+    AsyncPartsWithStreamingResponse,
+)
+from ...types import FilePurpose, upload_create_params, upload_complete_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.upload import Upload
+from ...types.file_purpose import FilePurpose
+
+__all__ = ["Uploads", "AsyncUploads"]
+
+
+# 64MB
+DEFAULT_PART_SIZE = 64 * 1024 * 1024
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Uploads(SyncAPIResource):
+    @cached_property
+    def parts(self) -> Parts:
+        return Parts(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> UploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return UploadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> UploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return UploadsWithStreamingResponse(self)
+
+    @overload
+    def upload_file_chunked(
+        self,
+        *,
+        file: os.PathLike[str],
+        mime_type: str,
+        purpose: FilePurpose,
+        bytes: int | None = None,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits a file into multiple 64MB parts and uploads them sequentially."""
+
+    @overload
+    def upload_file_chunked(
+        self,
+        *,
+        file: bytes,
+        filename: str,
+        bytes: int,
+        mime_type: str,
+        purpose: FilePurpose,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits an in-memory file into multiple 64MB parts and uploads them sequentially."""
+
+    def upload_file_chunked(
+        self,
+        *,
+        file: os.PathLike[str] | bytes,
+        mime_type: str,
+        purpose: FilePurpose,
+        filename: str | None = None,
+        bytes: int | None = None,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits the given file into multiple parts and uploads them sequentially.
+
+        ```py
+        from pathlib import Path
+
+        client.uploads.upload_file(
+            file=Path("my-paper.pdf"),
+            mime_type="pdf",
+            purpose="assistants",
+        )
+        ```
+        """
+        if isinstance(file, builtins.bytes):
+            if filename is None:
+                raise TypeError("The `filename` argument must be given for in-memory files")
+
+            if bytes is None:
+                raise TypeError("The `bytes` argument must be given for in-memory files")
+        else:
+            if not isinstance(file, Path):
+                file = Path(file)
+
+            if not filename:
+                filename = file.name
+
+            if bytes is None:
+                bytes = file.stat().st_size
+
+        upload = self.create(
+            bytes=bytes,
+            filename=filename,
+            mime_type=mime_type,
+            purpose=purpose,
+        )
+
+        part_ids: list[str] = []
+
+        if part_size is None:
+            part_size = DEFAULT_PART_SIZE
+
+        if isinstance(file, builtins.bytes):
+            buf: io.FileIO | io.BytesIO = io.BytesIO(file)
+        else:
+            buf = io.FileIO(file)
+
+        try:
+            while True:
+                data = buf.read(part_size)
+                if not data:
+                    # EOF
+                    break
+
+                part = self.parts.create(upload_id=upload.id, data=data)
+                log.info("Uploaded part %s for upload %s", part.id, upload.id)
+                part_ids.append(part.id)
+        except Exception:
+            buf.close()
+            raise
+
+        return self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
+
+    def create(
+        self,
+        *,
+        bytes: int,
+        filename: str,
+        mime_type: str,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Creates an intermediate
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
+        that you can add
+        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
+        Currently, an Upload can accept at most 8 GB in total and expires after an hour
+        after you create it.
+
+        Once you complete the Upload, we will create a
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        contains all the parts you uploaded. This File is usable in the rest of our
+        platform as a regular File object.
+
+        For certain `purpose` values, the correct `mime_type` must be specified. Please
+        refer to documentation for the
+        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
+
+        For guidance on the proper filename extensions for each purpose, please follow
+        the documentation on
+        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+
+        Args:
+          bytes: The number of bytes in the file you are uploading.
+
+          filename: The name of the file to upload.
+
+          mime_type: The MIME type of the file.
+
+              This must fall within the supported MIME types for your file purpose. See the
+              supported MIME types for assistants and vision.
+
+          purpose: The intended purpose of the uploaded file.
+
+              See the
+              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/uploads",
+            body=maybe_transform(
+                {
+                    "bytes": bytes,
+                    "filename": filename,
+                    "mime_type": mime_type,
+                    "purpose": purpose,
+                },
+                upload_create_params.UploadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    def cancel(
+        self,
+        upload_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Cancels the Upload.
+
+        No Parts may be added after an Upload is cancelled.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return self._post(
+            f"/uploads/{upload_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    def complete(
+        self,
+        upload_id: str,
+        *,
+        part_ids: List[str],
+        md5: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Completes the
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+
+        Within the returned Upload object, there is a nested
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        is ready to use in the rest of the platform.
+
+        You can specify the order of the Parts by passing in an ordered list of the Part
+        IDs.
+
+        The number of bytes uploaded upon completion must match the number of bytes
+        initially specified when creating the Upload object. No Parts may be added after
+        an Upload is completed.
+
+        Args:
+          part_ids: The ordered list of Part IDs.
+
+          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
+              matches what you expect.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return self._post(
+            f"/uploads/{upload_id}/complete",
+            body=maybe_transform(
+                {
+                    "part_ids": part_ids,
+                    "md5": md5,
+                },
+                upload_complete_params.UploadCompleteParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+
+class AsyncUploads(AsyncAPIResource):
+    @cached_property
+    def parts(self) -> AsyncParts:
+        return AsyncParts(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncUploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncUploadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncUploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncUploadsWithStreamingResponse(self)
+
+    @overload
+    async def upload_file_chunked(
+        self,
+        *,
+        file: os.PathLike[str],
+        mime_type: str,
+        purpose: FilePurpose,
+        bytes: int | None = None,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits a file into multiple 64MB parts and uploads them sequentially."""
+
+    @overload
+    async def upload_file_chunked(
+        self,
+        *,
+        file: bytes,
+        filename: str,
+        bytes: int,
+        mime_type: str,
+        purpose: FilePurpose,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits an in-memory file into multiple 64MB parts and uploads them sequentially."""
+
+    async def upload_file_chunked(
+        self,
+        *,
+        file: os.PathLike[str] | bytes,
+        mime_type: str,
+        purpose: FilePurpose,
+        filename: str | None = None,
+        bytes: int | None = None,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits the given file into multiple parts and uploads them sequentially.
+
+        ```py
+        from pathlib import Path
+
+        client.uploads.upload_file(
+            file=Path("my-paper.pdf"),
+            mime_type="pdf",
+            purpose="assistants",
+        )
+        ```
+        """
+        if isinstance(file, builtins.bytes):
+            if filename is None:
+                raise TypeError("The `filename` argument must be given for in-memory files")
+
+            if bytes is None:
+                raise TypeError("The `bytes` argument must be given for in-memory files")
+        else:
+            if not isinstance(file, anyio.Path):
+                file = anyio.Path(file)
+
+            if not filename:
+                filename = file.name
+
+            if bytes is None:
+                stat = await file.stat()
+                bytes = stat.st_size
+
+        upload = await self.create(
+            bytes=bytes,
+            filename=filename,
+            mime_type=mime_type,
+            purpose=purpose,
+        )
+
+        part_ids: list[str] = []
+
+        if part_size is None:
+            part_size = DEFAULT_PART_SIZE
+
+        if isinstance(file, anyio.Path):
+            fd = await file.open("rb")
+            async with fd:
+                while True:
+                    data = await fd.read(part_size)
+                    if not data:
+                        # EOF
+                        break
+
+                    part = await self.parts.create(upload_id=upload.id, data=data)
+                    log.info("Uploaded part %s for upload %s", part.id, upload.id)
+                    part_ids.append(part.id)
+        else:
+            buf = io.BytesIO(file)
+
+            try:
+                while True:
+                    data = buf.read(part_size)
+                    if not data:
+                        # EOF
+                        break
+
+                    part = await self.parts.create(upload_id=upload.id, data=data)
+                    log.info("Uploaded part %s for upload %s", part.id, upload.id)
+                    part_ids.append(part.id)
+            except Exception:
+                buf.close()
+                raise
+
+        return await self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
+
+    async def create(
+        self,
+        *,
+        bytes: int,
+        filename: str,
+        mime_type: str,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Creates an intermediate
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
+        that you can add
+        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
+        Currently, an Upload can accept at most 8 GB in total and expires after an hour
+        after you create it.
+
+        Once you complete the Upload, we will create a
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        contains all the parts you uploaded. This File is usable in the rest of our
+        platform as a regular File object.
+
+        For certain `purpose` values, the correct `mime_type` must be specified. Please
+        refer to documentation for the
+        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
+
+        For guidance on the proper filename extensions for each purpose, please follow
+        the documentation on
+        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+
+        Args:
+          bytes: The number of bytes in the file you are uploading.
+
+          filename: The name of the file to upload.
+
+          mime_type: The MIME type of the file.
+
+              This must fall within the supported MIME types for your file purpose. See the
+              supported MIME types for assistants and vision.
+
+          purpose: The intended purpose of the uploaded file.
+
+              See the
+              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/uploads",
+            body=await async_maybe_transform(
+                {
+                    "bytes": bytes,
+                    "filename": filename,
+                    "mime_type": mime_type,
+                    "purpose": purpose,
+                },
+                upload_create_params.UploadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    async def cancel(
+        self,
+        upload_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Cancels the Upload.
+
+        No Parts may be added after an Upload is cancelled.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return await self._post(
+            f"/uploads/{upload_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    async def complete(
+        self,
+        upload_id: str,
+        *,
+        part_ids: List[str],
+        md5: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Completes the
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+
+        Within the returned Upload object, there is a nested
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        is ready to use in the rest of the platform.
+
+        You can specify the order of the Parts by passing in an ordered list of the Part
+        IDs.
+
+        The number of bytes uploaded upon completion must match the number of bytes
+        initially specified when creating the Upload object. No Parts may be added after
+        an Upload is completed.
+
+        Args:
+          part_ids: The ordered list of Part IDs.
+
+          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
+              matches what you expect.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return await self._post(
+            f"/uploads/{upload_id}/complete",
+            body=await async_maybe_transform(
+                {
+                    "part_ids": part_ids,
+                    "md5": md5,
+                },
+                upload_complete_params.UploadCompleteParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+
+class UploadsWithRawResponse:
+    def __init__(self, uploads: Uploads) -> None:
+        self._uploads = uploads
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = _legacy_response.to_raw_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> PartsWithRawResponse:
+        return PartsWithRawResponse(self._uploads.parts)
+
+
+class AsyncUploadsWithRawResponse:
+    def __init__(self, uploads: AsyncUploads) -> None:
+        self._uploads = uploads
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = _legacy_response.async_to_raw_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> AsyncPartsWithRawResponse:
+        return AsyncPartsWithRawResponse(self._uploads.parts)
+
+
+class UploadsWithStreamingResponse:
+    def __init__(self, uploads: Uploads) -> None:
+        self._uploads = uploads
+
+        self.create = to_streamed_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = to_streamed_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> PartsWithStreamingResponse:
+        return PartsWithStreamingResponse(self._uploads.parts)
+
+
+class AsyncUploadsWithStreamingResponse:
+    def __init__(self, uploads: AsyncUploads) -> None:
+        self._uploads = uploads
+
+        self.create = async_to_streamed_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = async_to_streamed_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> AsyncPartsWithStreamingResponse:
+        return AsyncPartsWithStreamingResponse(self._uploads.parts)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/__init__.py
new file mode 100644
index 00000000..96ae16c3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+
+__all__ = [
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "FileBatches",
+    "AsyncFileBatches",
+    "FileBatchesWithRawResponse",
+    "AsyncFileBatchesWithRawResponse",
+    "FileBatchesWithStreamingResponse",
+    "AsyncFileBatchesWithStreamingResponse",
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/file_batches.py b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/file_batches.py
new file mode 100644
index 00000000..9b4b64d3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/file_batches.py
@@ -0,0 +1,801 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import asyncio
+from typing import Dict, List, Iterable, Optional
+from typing_extensions import Union, Literal
+from concurrent.futures import Future, ThreadPoolExecutor, as_completed
+
+import httpx
+import sniffio
+
+from ... import _legacy_response
+from ...types import FileChunkingStrategyParam
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    is_given,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.file_object import FileObject
+from ...types.vector_stores import file_batch_create_params, file_batch_list_files_params
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.vector_store_file_batch import VectorStoreFileBatch
+
+__all__ = ["FileBatches", "AsyncFileBatches"]
+
+
+class FileBatches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FileBatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            chunking_strategy=chunking_strategy,
+        )
+        # TODO: don't poll unless necessary??
+        return self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        results: list[FileObject] = []
+
+        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
+            futures: list[Future[FileObject]] = [
+                executor.submit(
+                    self._client.files.create,
+                    file=file,
+                    purpose="assistants",
+                )
+                for file in files
+            ]
+
+        for future in as_completed(futures):
+            exc = future.exception()
+            if exc:
+                raise exc
+
+            results.append(future.result())
+
+        batch = self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in results)],
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+        return batch
+
+
+class AsyncFileBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFileBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=await async_maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = await self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            chunking_strategy=chunking_strategy,
+        )
+        # TODO: don't poll unless necessary??
+        return await self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    async def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        uploaded_files: list[FileObject] = []
+
+        async_library = sniffio.current_async_library()
+
+        if async_library == "asyncio":
+
+            async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None:
+                async with semaphore:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            semaphore = asyncio.Semaphore(max_concurrency)
+
+            tasks = [asyncio_upload_file(semaphore, file) for file in files]
+
+            await asyncio.gather(*tasks)
+        elif async_library == "trio":
+            # We only import if the library is being used.
+            # We support Python 3.7 so are using an older version of trio that does not have type information
+            import trio  # type: ignore # pyright: ignore[reportMissingTypeStubs]
+
+            async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None:
+                async with limiter:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            limiter = trio.CapacityLimiter(max_concurrency)
+
+            async with trio.open_nursery() as nursery:
+                for file in files:
+                    nursery.start_soon(trio_upload_file, limiter, file)  # pyright: ignore [reportUnknownMemberType]
+        else:
+            raise RuntimeError(
+                f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported",
+            )
+
+        batch = await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in uploaded_files)],
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+        return batch
+
+
+class FileBatchesWithRawResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithRawResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class FileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = async_to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/files.py b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/files.py
new file mode 100644
index 00000000..7d93798a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/files.py
@@ -0,0 +1,933 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Dict, Union, Optional
+from typing_extensions import Literal, assert_never
+
+import httpx
+
+from ... import _legacy_response
+from ...types import FileChunkingStrategyParam
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    is_given,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_stores import file_list_params, file_create_params, file_update_params
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.file_content_response import FileContentResponse
+from ...types.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+
+        Args:
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/files",
+            body=maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Retrieves a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def update(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Update attributes on a vector store file.
+
+        Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            body=maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def list(
+        self,
+        vector_store_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)
+
+        return self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy)
+
+    def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            chunking_strategy=chunking_strategy,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[FileContentResponse]:
+        """
+        Retrieve the parsed contents of a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files/{file_id}/content",
+            page=SyncPage[FileContentResponse],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=FileContentResponse,
+        )
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+
+        Args:
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/files",
+            body=await async_maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Retrieves a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    async def update(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Update attributes on a vector store file.
+
+        Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            body=await async_maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def list(
+        self,
+        vector_store_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    async def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        await self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)
+
+        return await self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    async def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    async def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create(
+            vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy
+        )
+
+    async def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FileContentResponse, AsyncPage[FileContentResponse]]:
+        """
+        Retrieve the parsed contents of a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files/{file_id}/content",
+            page=AsyncPage[FileContentResponse],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=FileContentResponse,
+        )
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            files.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.to_raw_response_wrapper(
+            files.content,
+        )
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            files.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            files.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = to_streamed_response_wrapper(
+            files.content,
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            files.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = async_to_streamed_response_wrapper(
+            files.content,
+        )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/vector_stores.py b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/vector_stores.py
new file mode 100644
index 00000000..aaa6ed27
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/vector_stores.py
@@ -0,0 +1,868 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from ...types import (
+    FileChunkingStrategyParam,
+    vector_store_list_params,
+    vector_store_create_params,
+    vector_store_search_params,
+    vector_store_update_params,
+)
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_store import VectorStore
+from ...types.vector_store_deleted import VectorStoreDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_store_search_response import VectorStoreSearchResponse
+
+__all__ = ["VectorStores", "AsyncVectorStores"]
+
+
+class VectorStores(SyncAPIResource):
+    @cached_property
+    def files(self) -> Files:
+        return Files(self._client)
+
+    @cached_property
+    def file_batches(self) -> FileBatches:
+        return FileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> VectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return VectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> VectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return VectorStoresWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/vector_stores",
+            body=maybe_transform(
+                {
+                    "chunking_strategy": chunking_strategy,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStore]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=SyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+    def search(
+        self,
+        vector_store_id: str,
+        *,
+        query: Union[str, List[str]],
+        filters: vector_store_search_params.Filters | NotGiven = NOT_GIVEN,
+        max_num_results: int | NotGiven = NOT_GIVEN,
+        ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
+        rewrite_query: bool | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[VectorStoreSearchResponse]:
+        """
+        Search a vector store for relevant chunks based on a query and file attributes
+        filter.
+
+        Args:
+          query: A query string for a search
+
+          filters: A filter to apply based on file attributes.
+
+          max_num_results: The maximum number of results to return. This number should be between 1 and 50
+              inclusive.
+
+          ranking_options: Ranking options for search.
+
+          rewrite_query: Whether to rewrite the natural language query for vector search.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/search",
+            page=SyncPage[VectorStoreSearchResponse],
+            body=maybe_transform(
+                {
+                    "query": query,
+                    "filters": filters,
+                    "max_num_results": max_num_results,
+                    "ranking_options": ranking_options,
+                    "rewrite_query": rewrite_query,
+                },
+                vector_store_search_params.VectorStoreSearchParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=VectorStoreSearchResponse,
+            method="post",
+        )
+
+
+class AsyncVectorStores(AsyncAPIResource):
+    @cached_property
+    def files(self) -> AsyncFiles:
+        return AsyncFiles(self._client)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatches:
+        return AsyncFileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncVectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncVectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncVectorStoresWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/vector_stores",
+            body=await async_maybe_transform(
+                {
+                    "chunking_strategy": chunking_strategy,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=await async_maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStore, AsyncCursorPage[VectorStore]]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=AsyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    async def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+    def search(
+        self,
+        vector_store_id: str,
+        *,
+        query: Union[str, List[str]],
+        filters: vector_store_search_params.Filters | NotGiven = NOT_GIVEN,
+        max_num_results: int | NotGiven = NOT_GIVEN,
+        ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
+        rewrite_query: bool | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreSearchResponse, AsyncPage[VectorStoreSearchResponse]]:
+        """
+        Search a vector store for relevant chunks based on a query and file attributes
+        filter.
+
+        Args:
+          query: A query string for a search
+
+          filters: A filter to apply based on file attributes.
+
+          max_num_results: The maximum number of results to return. This number should be between 1 and 50
+              inclusive.
+
+          ranking_options: Ranking options for search.
+
+          rewrite_query: Whether to rewrite the natural language query for vector search.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/search",
+            page=AsyncPage[VectorStoreSearchResponse],
+            body=maybe_transform(
+                {
+                    "query": query,
+                    "filters": filters,
+                    "max_num_results": max_num_results,
+                    "ranking_options": ranking_options,
+                    "rewrite_query": rewrite_query,
+                },
+                vector_store_search_params.VectorStoreSearchParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=VectorStoreSearchResponse,
+            method="post",
+        )
+
+
+class VectorStoresWithRawResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+        self.search = _legacy_response.to_raw_response_wrapper(
+            vector_stores.search,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithRawResponse:
+        return FileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithRawResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+        self.search = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.search,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithRawResponse:
+        return AsyncFileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class VectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+        self.search = to_streamed_response_wrapper(
+            vector_stores.search,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithStreamingResponse:
+        return FileBatchesWithStreamingResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = async_to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+        self.search = async_to_streamed_response_wrapper(
+            vector_stores.search,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithStreamingResponse:
+        return AsyncFileBatchesWithStreamingResponse(self._vector_stores.file_batches)