diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/openai/resources | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/openai/resources')
50 files changed, 26502 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/__init__.py new file mode 100644 index 00000000..d3457cf3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/__init__.py @@ -0,0 +1,201 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .beta import ( + Beta, + AsyncBeta, + BetaWithRawResponse, + AsyncBetaWithRawResponse, + BetaWithStreamingResponse, + AsyncBetaWithStreamingResponse, +) +from .chat import ( + Chat, + AsyncChat, + ChatWithRawResponse, + AsyncChatWithRawResponse, + ChatWithStreamingResponse, + AsyncChatWithStreamingResponse, +) +from .audio import ( + Audio, + AsyncAudio, + AudioWithRawResponse, + AsyncAudioWithRawResponse, + AudioWithStreamingResponse, + AsyncAudioWithStreamingResponse, +) +from .files import ( + Files, + AsyncFiles, + FilesWithRawResponse, + AsyncFilesWithRawResponse, + FilesWithStreamingResponse, + AsyncFilesWithStreamingResponse, +) +from .images import ( + Images, + AsyncImages, + ImagesWithRawResponse, + AsyncImagesWithRawResponse, + ImagesWithStreamingResponse, + AsyncImagesWithStreamingResponse, +) +from .models import ( + Models, + AsyncModels, + ModelsWithRawResponse, + AsyncModelsWithRawResponse, + ModelsWithStreamingResponse, + AsyncModelsWithStreamingResponse, +) +from .batches import ( + Batches, + AsyncBatches, + BatchesWithRawResponse, + AsyncBatchesWithRawResponse, + BatchesWithStreamingResponse, + AsyncBatchesWithStreamingResponse, +) +from .uploads import ( + Uploads, + AsyncUploads, + UploadsWithRawResponse, + AsyncUploadsWithRawResponse, + UploadsWithStreamingResponse, + AsyncUploadsWithStreamingResponse, +) +from .responses import ( + Responses, + AsyncResponses, + ResponsesWithRawResponse, + AsyncResponsesWithRawResponse, + ResponsesWithStreamingResponse, + AsyncResponsesWithStreamingResponse, +) +from .embeddings import ( + Embeddings, + AsyncEmbeddings, + EmbeddingsWithRawResponse, + AsyncEmbeddingsWithRawResponse, + EmbeddingsWithStreamingResponse, + AsyncEmbeddingsWithStreamingResponse, +) +from .completions import ( + Completions, + AsyncCompletions, + CompletionsWithRawResponse, + AsyncCompletionsWithRawResponse, + CompletionsWithStreamingResponse, + AsyncCompletionsWithStreamingResponse, +) +from .fine_tuning import ( + FineTuning, + AsyncFineTuning, + FineTuningWithRawResponse, + AsyncFineTuningWithRawResponse, + FineTuningWithStreamingResponse, + AsyncFineTuningWithStreamingResponse, +) +from .moderations import ( + Moderations, + AsyncModerations, + ModerationsWithRawResponse, + AsyncModerationsWithRawResponse, + ModerationsWithStreamingResponse, + AsyncModerationsWithStreamingResponse, +) +from .vector_stores import ( + VectorStores, + AsyncVectorStores, + VectorStoresWithRawResponse, + AsyncVectorStoresWithRawResponse, + VectorStoresWithStreamingResponse, + AsyncVectorStoresWithStreamingResponse, +) + +__all__ = [ + "Completions", + "AsyncCompletions", + "CompletionsWithRawResponse", + "AsyncCompletionsWithRawResponse", + "CompletionsWithStreamingResponse", + "AsyncCompletionsWithStreamingResponse", + "Chat", + "AsyncChat", + "ChatWithRawResponse", + "AsyncChatWithRawResponse", + "ChatWithStreamingResponse", + "AsyncChatWithStreamingResponse", + "Embeddings", + "AsyncEmbeddings", + "EmbeddingsWithRawResponse", + "AsyncEmbeddingsWithRawResponse", + "EmbeddingsWithStreamingResponse", + "AsyncEmbeddingsWithStreamingResponse", + "Files", + "AsyncFiles", + "FilesWithRawResponse", + "AsyncFilesWithRawResponse", + "FilesWithStreamingResponse", + "AsyncFilesWithStreamingResponse", + "Images", + "AsyncImages", + "ImagesWithRawResponse", + "AsyncImagesWithRawResponse", + "ImagesWithStreamingResponse", + "AsyncImagesWithStreamingResponse", + "Audio", + "AsyncAudio", + "AudioWithRawResponse", + "AsyncAudioWithRawResponse", + "AudioWithStreamingResponse", + "AsyncAudioWithStreamingResponse", + "Moderations", + "AsyncModerations", + "ModerationsWithRawResponse", + "AsyncModerationsWithRawResponse", + "ModerationsWithStreamingResponse", + "AsyncModerationsWithStreamingResponse", + "Models", + "AsyncModels", + "ModelsWithRawResponse", + "AsyncModelsWithRawResponse", + "ModelsWithStreamingResponse", + "AsyncModelsWithStreamingResponse", + "FineTuning", + "AsyncFineTuning", + "FineTuningWithRawResponse", + "AsyncFineTuningWithRawResponse", + "FineTuningWithStreamingResponse", + "AsyncFineTuningWithStreamingResponse", + "VectorStores", + "AsyncVectorStores", + "VectorStoresWithRawResponse", + "AsyncVectorStoresWithRawResponse", + "VectorStoresWithStreamingResponse", + "AsyncVectorStoresWithStreamingResponse", + "Beta", + "AsyncBeta", + "BetaWithRawResponse", + "AsyncBetaWithRawResponse", + "BetaWithStreamingResponse", + "AsyncBetaWithStreamingResponse", + "Batches", + "AsyncBatches", + "BatchesWithRawResponse", + "AsyncBatchesWithRawResponse", + "BatchesWithStreamingResponse", + "AsyncBatchesWithStreamingResponse", + "Uploads", + "AsyncUploads", + "UploadsWithRawResponse", + "AsyncUploadsWithRawResponse", + "UploadsWithStreamingResponse", + "AsyncUploadsWithStreamingResponse", + "Responses", + "AsyncResponses", + "ResponsesWithRawResponse", + "AsyncResponsesWithRawResponse", + "ResponsesWithStreamingResponse", + "AsyncResponsesWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/__init__.py new file mode 100644 index 00000000..7da1d2db --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/__init__.py @@ -0,0 +1,61 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .audio import ( + Audio, + AsyncAudio, + AudioWithRawResponse, + AsyncAudioWithRawResponse, + AudioWithStreamingResponse, + AsyncAudioWithStreamingResponse, +) +from .speech import ( + Speech, + AsyncSpeech, + SpeechWithRawResponse, + AsyncSpeechWithRawResponse, + SpeechWithStreamingResponse, + AsyncSpeechWithStreamingResponse, +) +from .translations import ( + Translations, + AsyncTranslations, + TranslationsWithRawResponse, + AsyncTranslationsWithRawResponse, + TranslationsWithStreamingResponse, + AsyncTranslationsWithStreamingResponse, +) +from .transcriptions import ( + Transcriptions, + AsyncTranscriptions, + TranscriptionsWithRawResponse, + AsyncTranscriptionsWithRawResponse, + TranscriptionsWithStreamingResponse, + AsyncTranscriptionsWithStreamingResponse, +) + +__all__ = [ + "Transcriptions", + "AsyncTranscriptions", + "TranscriptionsWithRawResponse", + "AsyncTranscriptionsWithRawResponse", + "TranscriptionsWithStreamingResponse", + "AsyncTranscriptionsWithStreamingResponse", + "Translations", + "AsyncTranslations", + "TranslationsWithRawResponse", + "AsyncTranslationsWithRawResponse", + "TranslationsWithStreamingResponse", + "AsyncTranslationsWithStreamingResponse", + "Speech", + "AsyncSpeech", + "SpeechWithRawResponse", + "AsyncSpeechWithRawResponse", + "SpeechWithStreamingResponse", + "AsyncSpeechWithStreamingResponse", + "Audio", + "AsyncAudio", + "AudioWithRawResponse", + "AsyncAudioWithRawResponse", + "AudioWithStreamingResponse", + "AsyncAudioWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/audio.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/audio.py new file mode 100644 index 00000000..383b7073 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/audio.py @@ -0,0 +1,166 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from .speech import ( + Speech, + AsyncSpeech, + SpeechWithRawResponse, + AsyncSpeechWithRawResponse, + SpeechWithStreamingResponse, + AsyncSpeechWithStreamingResponse, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from .translations import ( + Translations, + AsyncTranslations, + TranslationsWithRawResponse, + AsyncTranslationsWithRawResponse, + TranslationsWithStreamingResponse, + AsyncTranslationsWithStreamingResponse, +) +from .transcriptions import ( + Transcriptions, + AsyncTranscriptions, + TranscriptionsWithRawResponse, + AsyncTranscriptionsWithRawResponse, + TranscriptionsWithStreamingResponse, + AsyncTranscriptionsWithStreamingResponse, +) + +__all__ = ["Audio", "AsyncAudio"] + + +class Audio(SyncAPIResource): + @cached_property + def transcriptions(self) -> Transcriptions: + return Transcriptions(self._client) + + @cached_property + def translations(self) -> Translations: + return Translations(self._client) + + @cached_property + def speech(self) -> Speech: + return Speech(self._client) + + @cached_property + def with_raw_response(self) -> AudioWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AudioWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AudioWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AudioWithStreamingResponse(self) + + +class AsyncAudio(AsyncAPIResource): + @cached_property + def transcriptions(self) -> AsyncTranscriptions: + return AsyncTranscriptions(self._client) + + @cached_property + def translations(self) -> AsyncTranslations: + return AsyncTranslations(self._client) + + @cached_property + def speech(self) -> AsyncSpeech: + return AsyncSpeech(self._client) + + @cached_property + def with_raw_response(self) -> AsyncAudioWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncAudioWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncAudioWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncAudioWithStreamingResponse(self) + + +class AudioWithRawResponse: + def __init__(self, audio: Audio) -> None: + self._audio = audio + + @cached_property + def transcriptions(self) -> TranscriptionsWithRawResponse: + return TranscriptionsWithRawResponse(self._audio.transcriptions) + + @cached_property + def translations(self) -> TranslationsWithRawResponse: + return TranslationsWithRawResponse(self._audio.translations) + + @cached_property + def speech(self) -> SpeechWithRawResponse: + return SpeechWithRawResponse(self._audio.speech) + + +class AsyncAudioWithRawResponse: + def __init__(self, audio: AsyncAudio) -> None: + self._audio = audio + + @cached_property + def transcriptions(self) -> AsyncTranscriptionsWithRawResponse: + return AsyncTranscriptionsWithRawResponse(self._audio.transcriptions) + + @cached_property + def translations(self) -> AsyncTranslationsWithRawResponse: + return AsyncTranslationsWithRawResponse(self._audio.translations) + + @cached_property + def speech(self) -> AsyncSpeechWithRawResponse: + return AsyncSpeechWithRawResponse(self._audio.speech) + + +class AudioWithStreamingResponse: + def __init__(self, audio: Audio) -> None: + self._audio = audio + + @cached_property + def transcriptions(self) -> TranscriptionsWithStreamingResponse: + return TranscriptionsWithStreamingResponse(self._audio.transcriptions) + + @cached_property + def translations(self) -> TranslationsWithStreamingResponse: + return TranslationsWithStreamingResponse(self._audio.translations) + + @cached_property + def speech(self) -> SpeechWithStreamingResponse: + return SpeechWithStreamingResponse(self._audio.speech) + + +class AsyncAudioWithStreamingResponse: + def __init__(self, audio: AsyncAudio) -> None: + self._audio = audio + + @cached_property + def transcriptions(self) -> AsyncTranscriptionsWithStreamingResponse: + return AsyncTranscriptionsWithStreamingResponse(self._audio.transcriptions) + + @cached_property + def translations(self) -> AsyncTranslationsWithStreamingResponse: + return AsyncTranslationsWithStreamingResponse(self._audio.translations) + + @cached_property + def speech(self) -> AsyncSpeechWithStreamingResponse: + return AsyncSpeechWithStreamingResponse(self._audio.speech) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/speech.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/speech.py new file mode 100644 index 00000000..529e3a47 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/speech.py @@ -0,0 +1,244 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union +from typing_extensions import Literal + +import httpx + +from ... import _legacy_response +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._utils import ( + maybe_transform, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + StreamedBinaryAPIResponse, + AsyncStreamedBinaryAPIResponse, + to_custom_streamed_response_wrapper, + async_to_custom_streamed_response_wrapper, +) +from ...types.audio import speech_create_params +from ..._base_client import make_request_options +from ...types.audio.speech_model import SpeechModel + +__all__ = ["Speech", "AsyncSpeech"] + + +class Speech(SyncAPIResource): + @cached_property + def with_raw_response(self) -> SpeechWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return SpeechWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> SpeechWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return SpeechWithStreamingResponse(self) + + def create( + self, + *, + input: str, + model: Union[str, SpeechModel], + voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"], + instructions: str | NotGiven = NOT_GIVEN, + response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN, + speed: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> _legacy_response.HttpxBinaryResponseContent: + """ + Generates audio from the input text. + + Args: + input: The text to generate audio for. The maximum length is 4096 characters. + + model: + One of the available [TTS models](https://platform.openai.com/docs/models#tts): + `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. + + voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`, + `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the + voices are available in the + [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options). + + instructions: Control the voice of your generated audio with additional instructions. Does not + work with `tts-1` or `tts-1-hd`. + + response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, + `wav`, and `pcm`. + + speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is + the default. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})} + return self._post( + "/audio/speech", + body=maybe_transform( + { + "input": input, + "model": model, + "voice": voice, + "instructions": instructions, + "response_format": response_format, + "speed": speed, + }, + speech_create_params.SpeechCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=_legacy_response.HttpxBinaryResponseContent, + ) + + +class AsyncSpeech(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncSpeechWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncSpeechWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncSpeechWithStreamingResponse(self) + + async def create( + self, + *, + input: str, + model: Union[str, SpeechModel], + voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"], + instructions: str | NotGiven = NOT_GIVEN, + response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN, + speed: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> _legacy_response.HttpxBinaryResponseContent: + """ + Generates audio from the input text. + + Args: + input: The text to generate audio for. The maximum length is 4096 characters. + + model: + One of the available [TTS models](https://platform.openai.com/docs/models#tts): + `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. + + voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`, + `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the + voices are available in the + [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options). + + instructions: Control the voice of your generated audio with additional instructions. Does not + work with `tts-1` or `tts-1-hd`. + + response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, + `wav`, and `pcm`. + + speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is + the default. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})} + return await self._post( + "/audio/speech", + body=await async_maybe_transform( + { + "input": input, + "model": model, + "voice": voice, + "instructions": instructions, + "response_format": response_format, + "speed": speed, + }, + speech_create_params.SpeechCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=_legacy_response.HttpxBinaryResponseContent, + ) + + +class SpeechWithRawResponse: + def __init__(self, speech: Speech) -> None: + self._speech = speech + + self.create = _legacy_response.to_raw_response_wrapper( + speech.create, + ) + + +class AsyncSpeechWithRawResponse: + def __init__(self, speech: AsyncSpeech) -> None: + self._speech = speech + + self.create = _legacy_response.async_to_raw_response_wrapper( + speech.create, + ) + + +class SpeechWithStreamingResponse: + def __init__(self, speech: Speech) -> None: + self._speech = speech + + self.create = to_custom_streamed_response_wrapper( + speech.create, + StreamedBinaryAPIResponse, + ) + + +class AsyncSpeechWithStreamingResponse: + def __init__(self, speech: AsyncSpeech) -> None: + self._speech = speech + + self.create = async_to_custom_streamed_response_wrapper( + speech.create, + AsyncStreamedBinaryAPIResponse, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/transcriptions.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/transcriptions.py new file mode 100644 index 00000000..2a77f91d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/transcriptions.py @@ -0,0 +1,682 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, List, Union, Mapping, Optional, cast +from typing_extensions import Literal, overload, assert_never + +import httpx + +from ... import _legacy_response +from ...types import AudioResponseFormat +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes +from ..._utils import ( + extract_files, + required_args, + maybe_transform, + deepcopy_minimal, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ..._streaming import Stream, AsyncStream +from ...types.audio import transcription_create_params +from ..._base_client import make_request_options +from ...types.audio_model import AudioModel +from ...types.audio.transcription import Transcription +from ...types.audio_response_format import AudioResponseFormat +from ...types.audio.transcription_include import TranscriptionInclude +from ...types.audio.transcription_verbose import TranscriptionVerbose +from ...types.audio.transcription_stream_event import TranscriptionStreamEvent +from ...types.audio.transcription_create_response import TranscriptionCreateResponse + +__all__ = ["Transcriptions", "AsyncTranscriptions"] + +log: logging.Logger = logging.getLogger("openai.audio.transcriptions") + + +class Transcriptions(SyncAPIResource): + @cached_property + def with_raw_response(self) -> TranscriptionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return TranscriptionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> TranscriptionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return TranscriptionsWithStreamingResponse(self) + + @overload + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Transcription: ... + + @overload + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + response_format: Literal["verbose_json"], + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TranscriptionVerbose: ... + + @overload + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + response_format: Literal["text", "srt", "vtt"], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> str: ... + + @overload + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + stream: Literal[True], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[TranscriptionStreamEvent]: + """ + Transcribes audio into the input language. + + Args: + file: + The audio file object (not file name) to transcribe, in one of these formats: + flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + + model: ID of the model to use. The options are `gpt-4o-transcribe`, + `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + Whisper V2 model). + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions) + for more information. + + Note: Streaming is not supported for the `whisper-1` model and will be ignored. + + include: Additional information to include in the transcription response. `logprobs` will + return the log probabilities of the tokens in the response to understand the + model's confidence in the transcription. `logprobs` only works with + response_format set to `json` and only with the models `gpt-4o-transcribe` and + `gpt-4o-mini-transcribe`. + + language: The language of the input audio. Supplying the input language in + [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + format will improve accuracy and latency. + + prompt: An optional text to guide the model's style or continue a previous audio + segment. The + [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) + should match the audio language. + + response_format: The format of the output, in one of these options: `json`, `text`, `srt`, + `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + the only supported format is `json`. + + temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + output more random, while lower values like 0.2 will make it more focused and + deterministic. If set to 0, the model will use + [log probability](https://en.wikipedia.org/wiki/Log_probability) to + automatically increase the temperature until certain thresholds are hit. + + timestamp_granularities: The timestamp granularities to populate for this transcription. + `response_format` must be set `verbose_json` to use timestamp granularities. + Either or both of these options are supported: `word`, or `segment`. Note: There + is no additional latency for segment timestamps, but generating word timestamps + incurs additional latency. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + stream: bool, + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TranscriptionCreateResponse | Stream[TranscriptionStreamEvent]: + """ + Transcribes audio into the input language. + + Args: + file: + The audio file object (not file name) to transcribe, in one of these formats: + flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + + model: ID of the model to use. The options are `gpt-4o-transcribe`, + `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + Whisper V2 model). + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions) + for more information. + + Note: Streaming is not supported for the `whisper-1` model and will be ignored. + + include: Additional information to include in the transcription response. `logprobs` will + return the log probabilities of the tokens in the response to understand the + model's confidence in the transcription. `logprobs` only works with + response_format set to `json` and only with the models `gpt-4o-transcribe` and + `gpt-4o-mini-transcribe`. + + language: The language of the input audio. Supplying the input language in + [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + format will improve accuracy and latency. + + prompt: An optional text to guide the model's style or continue a previous audio + segment. The + [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) + should match the audio language. + + response_format: The format of the output, in one of these options: `json`, `text`, `srt`, + `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + the only supported format is `json`. + + temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + output more random, while lower values like 0.2 will make it more focused and + deterministic. If set to 0, the model will use + [log probability](https://en.wikipedia.org/wiki/Log_probability) to + automatically increase the temperature until certain thresholds are hit. + + timestamp_granularities: The timestamp granularities to populate for this transcription. + `response_format` must be set `verbose_json` to use timestamp granularities. + Either or both of these options are supported: `word`, or `segment`. Note: There + is no additional latency for segment timestamps, but generating word timestamps + incurs additional latency. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["file", "model"], ["file", "model", "stream"]) + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> str | Transcription | TranscriptionVerbose | Stream[TranscriptionStreamEvent]: + body = deepcopy_minimal( + { + "file": file, + "model": model, + "include": include, + "language": language, + "prompt": prompt, + "response_format": response_format, + "stream": stream, + "temperature": temperature, + "timestamp_granularities": timestamp_granularities, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return self._post( # type: ignore[return-value] + "/audio/transcriptions", + body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=_get_response_format_type(response_format), + stream=stream or False, + stream_cls=Stream[TranscriptionStreamEvent], + ) + + +class AsyncTranscriptions(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncTranscriptionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncTranscriptionsWithStreamingResponse(self) + + @overload + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Transcription: ... + + @overload + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + response_format: Literal["verbose_json"], + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TranscriptionVerbose: ... + + @overload + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + response_format: Literal["text", "srt", "vtt"], + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> str: ... + + @overload + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + stream: Literal[True], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[TranscriptionStreamEvent]: + """ + Transcribes audio into the input language. + + Args: + file: + The audio file object (not file name) to transcribe, in one of these formats: + flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + + model: ID of the model to use. The options are `gpt-4o-transcribe`, + `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + Whisper V2 model). + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions) + for more information. + + Note: Streaming is not supported for the `whisper-1` model and will be ignored. + + include: Additional information to include in the transcription response. `logprobs` will + return the log probabilities of the tokens in the response to understand the + model's confidence in the transcription. `logprobs` only works with + response_format set to `json` and only with the models `gpt-4o-transcribe` and + `gpt-4o-mini-transcribe`. + + language: The language of the input audio. Supplying the input language in + [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + format will improve accuracy and latency. + + prompt: An optional text to guide the model's style or continue a previous audio + segment. The + [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) + should match the audio language. + + response_format: The format of the output, in one of these options: `json`, `text`, `srt`, + `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + the only supported format is `json`. + + temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + output more random, while lower values like 0.2 will make it more focused and + deterministic. If set to 0, the model will use + [log probability](https://en.wikipedia.org/wiki/Log_probability) to + automatically increase the temperature until certain thresholds are hit. + + timestamp_granularities: The timestamp granularities to populate for this transcription. + `response_format` must be set `verbose_json` to use timestamp granularities. + Either or both of these options are supported: `word`, or `segment`. Note: There + is no additional latency for segment timestamps, but generating word timestamps + incurs additional latency. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + stream: bool, + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TranscriptionCreateResponse | AsyncStream[TranscriptionStreamEvent]: + """ + Transcribes audio into the input language. + + Args: + file: + The audio file object (not file name) to transcribe, in one of these formats: + flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + + model: ID of the model to use. The options are `gpt-4o-transcribe`, + `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + Whisper V2 model). + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions) + for more information. + + Note: Streaming is not supported for the `whisper-1` model and will be ignored. + + include: Additional information to include in the transcription response. `logprobs` will + return the log probabilities of the tokens in the response to understand the + model's confidence in the transcription. `logprobs` only works with + response_format set to `json` and only with the models `gpt-4o-transcribe` and + `gpt-4o-mini-transcribe`. + + language: The language of the input audio. Supplying the input language in + [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + format will improve accuracy and latency. + + prompt: An optional text to guide the model's style or continue a previous audio + segment. The + [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) + should match the audio language. + + response_format: The format of the output, in one of these options: `json`, `text`, `srt`, + `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + the only supported format is `json`. + + temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + output more random, while lower values like 0.2 will make it more focused and + deterministic. If set to 0, the model will use + [log probability](https://en.wikipedia.org/wiki/Log_probability) to + automatically increase the temperature until certain thresholds are hit. + + timestamp_granularities: The timestamp granularities to populate for this transcription. + `response_format` must be set `verbose_json` to use timestamp granularities. + Either or both of these options are supported: `word`, or `segment`. Note: There + is no additional latency for segment timestamps, but generating word timestamps + incurs additional latency. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["file", "model"], ["file", "model", "stream"]) + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN, + language: str | NotGiven = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Transcription | TranscriptionVerbose | str | AsyncStream[TranscriptionStreamEvent]: + body = deepcopy_minimal( + { + "file": file, + "model": model, + "include": include, + "language": language, + "prompt": prompt, + "response_format": response_format, + "stream": stream, + "temperature": temperature, + "timestamp_granularities": timestamp_granularities, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return await self._post( + "/audio/transcriptions", + body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=_get_response_format_type(response_format), + stream=stream or False, + stream_cls=AsyncStream[TranscriptionStreamEvent], + ) + + +class TranscriptionsWithRawResponse: + def __init__(self, transcriptions: Transcriptions) -> None: + self._transcriptions = transcriptions + + self.create = _legacy_response.to_raw_response_wrapper( + transcriptions.create, + ) + + +class AsyncTranscriptionsWithRawResponse: + def __init__(self, transcriptions: AsyncTranscriptions) -> None: + self._transcriptions = transcriptions + + self.create = _legacy_response.async_to_raw_response_wrapper( + transcriptions.create, + ) + + +class TranscriptionsWithStreamingResponse: + def __init__(self, transcriptions: Transcriptions) -> None: + self._transcriptions = transcriptions + + self.create = to_streamed_response_wrapper( + transcriptions.create, + ) + + +class AsyncTranscriptionsWithStreamingResponse: + def __init__(self, transcriptions: AsyncTranscriptions) -> None: + self._transcriptions = transcriptions + + self.create = async_to_streamed_response_wrapper( + transcriptions.create, + ) + + +def _get_response_format_type( + response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven, +) -> type[Transcription | TranscriptionVerbose | str]: + if isinstance(response_format, NotGiven) or response_format is None: # pyright: ignore[reportUnnecessaryComparison] + return Transcription + + if response_format == "json": + return Transcription + elif response_format == "verbose_json": + return TranscriptionVerbose + elif response_format == "srt" or response_format == "text" or response_format == "vtt": + return str + elif TYPE_CHECKING: # type: ignore[unreachable] + assert_never(response_format) + else: + log.warn("Unexpected audio response format: %s", response_format) + return Transcription diff --git a/.venv/lib/python3.12/site-packages/openai/resources/audio/translations.py b/.venv/lib/python3.12/site-packages/openai/resources/audio/translations.py new file mode 100644 index 00000000..f55dbd0e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/audio/translations.py @@ -0,0 +1,372 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Union, Mapping, cast +from typing_extensions import Literal, overload, assert_never + +import httpx + +from ... import _legacy_response +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes +from ..._utils import ( + extract_files, + maybe_transform, + deepcopy_minimal, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...types.audio import translation_create_params +from ..._base_client import make_request_options +from ...types.audio_model import AudioModel +from ...types.audio.translation import Translation +from ...types.audio_response_format import AudioResponseFormat +from ...types.audio.translation_verbose import TranslationVerbose + +__all__ = ["Translations", "AsyncTranslations"] + +log: logging.Logger = logging.getLogger("openai.audio.transcriptions") + + +class Translations(SyncAPIResource): + @cached_property + def with_raw_response(self) -> TranslationsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return TranslationsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> TranslationsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return TranslationsWithStreamingResponse(self) + + @overload + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Translation: ... + + @overload + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + response_format: Literal["verbose_json"], + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TranslationVerbose: ... + + @overload + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + response_format: Literal["text", "srt", "vtt"], + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> str: ... + + def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + prompt: str | NotGiven = NOT_GIVEN, + response_format: Union[Literal["json", "text", "srt", "verbose_json", "vtt"], NotGiven] = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Translation | TranslationVerbose | str: + """ + Translates audio into English. + + Args: + file: The audio file object (not file name) translate, in one of these formats: flac, + mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + + model: ID of the model to use. Only `whisper-1` (which is powered by our open source + Whisper V2 model) is currently available. + + prompt: An optional text to guide the model's style or continue a previous audio + segment. The + [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) + should be in English. + + response_format: The format of the output, in one of these options: `json`, `text`, `srt`, + `verbose_json`, or `vtt`. + + temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + output more random, while lower values like 0.2 will make it more focused and + deterministic. If set to 0, the model will use + [log probability](https://en.wikipedia.org/wiki/Log_probability) to + automatically increase the temperature until certain thresholds are hit. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + body = deepcopy_minimal( + { + "file": file, + "model": model, + "prompt": prompt, + "response_format": response_format, + "temperature": temperature, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return self._post( # type: ignore[return-value] + "/audio/translations", + body=maybe_transform(body, translation_create_params.TranslationCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=_get_response_format_type(response_format), + ) + + +class AsyncTranslations(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncTranslationsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncTranslationsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncTranslationsWithStreamingResponse(self) + + @overload + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN, + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Translation: ... + + @overload + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + response_format: Literal["verbose_json"], + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TranslationVerbose: ... + + @overload + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + response_format: Literal["text", "srt", "vtt"], + prompt: str | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> str: ... + + async def create( + self, + *, + file: FileTypes, + model: Union[str, AudioModel], + prompt: str | NotGiven = NOT_GIVEN, + response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Translation | TranslationVerbose | str: + """ + Translates audio into English. + + Args: + file: The audio file object (not file name) translate, in one of these formats: flac, + mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + + model: ID of the model to use. Only `whisper-1` (which is powered by our open source + Whisper V2 model) is currently available. + + prompt: An optional text to guide the model's style or continue a previous audio + segment. The + [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) + should be in English. + + response_format: The format of the output, in one of these options: `json`, `text`, `srt`, + `verbose_json`, or `vtt`. + + temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + output more random, while lower values like 0.2 will make it more focused and + deterministic. If set to 0, the model will use + [log probability](https://en.wikipedia.org/wiki/Log_probability) to + automatically increase the temperature until certain thresholds are hit. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + body = deepcopy_minimal( + { + "file": file, + "model": model, + "prompt": prompt, + "response_format": response_format, + "temperature": temperature, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return await self._post( + "/audio/translations", + body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=_get_response_format_type(response_format), + ) + + +class TranslationsWithRawResponse: + def __init__(self, translations: Translations) -> None: + self._translations = translations + + self.create = _legacy_response.to_raw_response_wrapper( + translations.create, + ) + + +class AsyncTranslationsWithRawResponse: + def __init__(self, translations: AsyncTranslations) -> None: + self._translations = translations + + self.create = _legacy_response.async_to_raw_response_wrapper( + translations.create, + ) + + +class TranslationsWithStreamingResponse: + def __init__(self, translations: Translations) -> None: + self._translations = translations + + self.create = to_streamed_response_wrapper( + translations.create, + ) + + +class AsyncTranslationsWithStreamingResponse: + def __init__(self, translations: AsyncTranslations) -> None: + self._translations = translations + + self.create = async_to_streamed_response_wrapper( + translations.create, + ) + + +def _get_response_format_type( + response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven, +) -> type[Translation | TranslationVerbose | str]: + if isinstance(response_format, NotGiven) or response_format is None: # pyright: ignore[reportUnnecessaryComparison] + return Translation + + if response_format == "json": + return Translation + elif response_format == "verbose_json": + return TranslationVerbose + elif response_format == "srt" or response_format == "text" or response_format == "vtt": + return str + elif TYPE_CHECKING: # type: ignore[unreachable] + assert_never(response_format) + else: + log.warn("Unexpected audio response format: %s", response_format) + return Transcription diff --git a/.venv/lib/python3.12/site-packages/openai/resources/batches.py b/.venv/lib/python3.12/site-packages/openai/resources/batches.py new file mode 100644 index 00000000..b7a299be --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/batches.py @@ -0,0 +1,517 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import Literal + +import httpx + +from .. import _legacy_response +from ..types import batch_list_params, batch_create_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import ( + maybe_transform, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ..pagination import SyncCursorPage, AsyncCursorPage +from ..types.batch import Batch +from .._base_client import AsyncPaginator, make_request_options +from ..types.shared_params.metadata import Metadata + +__all__ = ["Batches", "AsyncBatches"] + + +class Batches(SyncAPIResource): + @cached_property + def with_raw_response(self) -> BatchesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return BatchesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> BatchesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return BatchesWithStreamingResponse(self) + + def create( + self, + *, + completion_window: Literal["24h"], + endpoint: Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"], + input_file_id: str, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + """ + Creates and executes a batch from an uploaded file of requests + + Args: + completion_window: The time frame within which the batch should be processed. Currently only `24h` + is supported. + + endpoint: The endpoint to be used for all requests in the batch. Currently + `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` + are supported. Note that `/v1/embeddings` batches are also restricted to a + maximum of 50,000 embedding inputs across all requests in the batch. + + input_file_id: The ID of an uploaded file that contains requests for the new batch. + + See [upload file](https://platform.openai.com/docs/api-reference/files/create) + for how to upload a file. + + Your input file must be formatted as a + [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input), + and must be uploaded with the purpose `batch`. The file can contain up to 50,000 + requests, and can be up to 200 MB in size. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/batches", + body=maybe_transform( + { + "completion_window": completion_window, + "endpoint": endpoint, + "input_file_id": input_file_id, + "metadata": metadata, + }, + batch_create_params.BatchCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Batch, + ) + + def retrieve( + self, + batch_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + """ + Retrieves a batch. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + return self._get( + f"/batches/{batch_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Batch, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[Batch]: + """List your organization's batches. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/batches", + page=SyncCursorPage[Batch], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + }, + batch_list_params.BatchListParams, + ), + ), + model=Batch, + ) + + def cancel( + self, + batch_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + """Cancels an in-progress batch. + + The batch will be in status `cancelling` for up to + 10 minutes, before changing to `cancelled`, where it will have partial results + (if any) available in the output file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + return self._post( + f"/batches/{batch_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Batch, + ) + + +class AsyncBatches(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncBatchesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncBatchesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncBatchesWithStreamingResponse(self) + + async def create( + self, + *, + completion_window: Literal["24h"], + endpoint: Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"], + input_file_id: str, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + """ + Creates and executes a batch from an uploaded file of requests + + Args: + completion_window: The time frame within which the batch should be processed. Currently only `24h` + is supported. + + endpoint: The endpoint to be used for all requests in the batch. Currently + `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` + are supported. Note that `/v1/embeddings` batches are also restricted to a + maximum of 50,000 embedding inputs across all requests in the batch. + + input_file_id: The ID of an uploaded file that contains requests for the new batch. + + See [upload file](https://platform.openai.com/docs/api-reference/files/create) + for how to upload a file. + + Your input file must be formatted as a + [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input), + and must be uploaded with the purpose `batch`. The file can contain up to 50,000 + requests, and can be up to 200 MB in size. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/batches", + body=await async_maybe_transform( + { + "completion_window": completion_window, + "endpoint": endpoint, + "input_file_id": input_file_id, + "metadata": metadata, + }, + batch_create_params.BatchCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Batch, + ) + + async def retrieve( + self, + batch_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + """ + Retrieves a batch. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + return await self._get( + f"/batches/{batch_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Batch, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[Batch, AsyncCursorPage[Batch]]: + """List your organization's batches. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/batches", + page=AsyncCursorPage[Batch], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + }, + batch_list_params.BatchListParams, + ), + ), + model=Batch, + ) + + async def cancel( + self, + batch_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + """Cancels an in-progress batch. + + The batch will be in status `cancelling` for up to + 10 minutes, before changing to `cancelled`, where it will have partial results + (if any) available in the output file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + return await self._post( + f"/batches/{batch_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Batch, + ) + + +class BatchesWithRawResponse: + def __init__(self, batches: Batches) -> None: + self._batches = batches + + self.create = _legacy_response.to_raw_response_wrapper( + batches.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + batches.retrieve, + ) + self.list = _legacy_response.to_raw_response_wrapper( + batches.list, + ) + self.cancel = _legacy_response.to_raw_response_wrapper( + batches.cancel, + ) + + +class AsyncBatchesWithRawResponse: + def __init__(self, batches: AsyncBatches) -> None: + self._batches = batches + + self.create = _legacy_response.async_to_raw_response_wrapper( + batches.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + batches.retrieve, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + batches.list, + ) + self.cancel = _legacy_response.async_to_raw_response_wrapper( + batches.cancel, + ) + + +class BatchesWithStreamingResponse: + def __init__(self, batches: Batches) -> None: + self._batches = batches + + self.create = to_streamed_response_wrapper( + batches.create, + ) + self.retrieve = to_streamed_response_wrapper( + batches.retrieve, + ) + self.list = to_streamed_response_wrapper( + batches.list, + ) + self.cancel = to_streamed_response_wrapper( + batches.cancel, + ) + + +class AsyncBatchesWithStreamingResponse: + def __init__(self, batches: AsyncBatches) -> None: + self._batches = batches + + self.create = async_to_streamed_response_wrapper( + batches.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + batches.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + batches.list, + ) + self.cancel = async_to_streamed_response_wrapper( + batches.cancel, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py new file mode 100644 index 00000000..87fea252 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py @@ -0,0 +1,47 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .beta import ( + Beta, + AsyncBeta, + BetaWithRawResponse, + AsyncBetaWithRawResponse, + BetaWithStreamingResponse, + AsyncBetaWithStreamingResponse, +) +from .threads import ( + Threads, + AsyncThreads, + ThreadsWithRawResponse, + AsyncThreadsWithRawResponse, + ThreadsWithStreamingResponse, + AsyncThreadsWithStreamingResponse, +) +from .assistants import ( + Assistants, + AsyncAssistants, + AssistantsWithRawResponse, + AsyncAssistantsWithRawResponse, + AssistantsWithStreamingResponse, + AsyncAssistantsWithStreamingResponse, +) + +__all__ = [ + "Assistants", + "AsyncAssistants", + "AssistantsWithRawResponse", + "AsyncAssistantsWithRawResponse", + "AssistantsWithStreamingResponse", + "AsyncAssistantsWithStreamingResponse", + "Threads", + "AsyncThreads", + "ThreadsWithRawResponse", + "AsyncThreadsWithRawResponse", + "ThreadsWithStreamingResponse", + "AsyncThreadsWithStreamingResponse", + "Beta", + "AsyncBeta", + "BetaWithRawResponse", + "AsyncBetaWithRawResponse", + "BetaWithStreamingResponse", + "AsyncBetaWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py new file mode 100644 index 00000000..1c7cbf37 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py @@ -0,0 +1,1004 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Iterable, Optional +from typing_extensions import Literal + +import httpx + +from ... import _legacy_response +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._utils import ( + maybe_transform, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...pagination import SyncCursorPage, AsyncCursorPage +from ...types.beta import ( + assistant_list_params, + assistant_create_params, + assistant_update_params, +) +from ..._base_client import AsyncPaginator, make_request_options +from ...types.beta.assistant import Assistant +from ...types.shared.chat_model import ChatModel +from ...types.beta.assistant_deleted import AssistantDeleted +from ...types.shared_params.metadata import Metadata +from ...types.shared.reasoning_effort import ReasoningEffort +from ...types.beta.assistant_tool_param import AssistantToolParam +from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam + +__all__ = ["Assistants", "AsyncAssistants"] + + +class Assistants(SyncAPIResource): + @cached_property + def with_raw_response(self) -> AssistantsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AssistantsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AssistantsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AssistantsWithStreamingResponse(self) + + def create( + self, + *, + model: Union[str, ChatModel], + description: Optional[str] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + name: Optional[str] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Assistant: + """ + Create an assistant with a model and instructions. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + description: The description of the assistant. The maximum length is 512 characters. + + instructions: The system instructions that the assistant uses. The maximum length is 256,000 + characters. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + name: The name of the assistant. The maximum length is 256 characters. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per + assistant. Tools can be of types `code_interpreter`, `file_search`, or + `function`. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + "/assistants", + body=maybe_transform( + { + "model": model, + "description": description, + "instructions": instructions, + "metadata": metadata, + "name": name, + "reasoning_effort": reasoning_effort, + "response_format": response_format, + "temperature": temperature, + "tool_resources": tool_resources, + "tools": tools, + "top_p": top_p, + }, + assistant_create_params.AssistantCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Assistant, + ) + + def retrieve( + self, + assistant_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Assistant: + """ + Retrieves an assistant. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not assistant_id: + raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get( + f"/assistants/{assistant_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Assistant, + ) + + def update( + self, + assistant_id: str, + *, + description: Optional[str] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[ + str, + Literal[ + "o3-mini", + "o3-mini-2025-01-31", + "o1", + "o1-2024-12-17", + "gpt-4o", + "gpt-4o-2024-11-20", + "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + "gpt-4.5-preview", + "gpt-4.5-preview-2025-02-27", + "gpt-4-turbo", + "gpt-4-turbo-2024-04-09", + "gpt-4-0125-preview", + "gpt-4-turbo-preview", + "gpt-4-1106-preview", + "gpt-4-vision-preview", + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo-16k-0613", + ], + ] + | NotGiven = NOT_GIVEN, + name: Optional[str] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Assistant: + """Modifies an assistant. + + Args: + description: The description of the assistant. + + The maximum length is 512 characters. + + instructions: The system instructions that the assistant uses. The maximum length is 256,000 + characters. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + name: The name of the assistant. The maximum length is 256 characters. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per + assistant. Tools can be of types `code_interpreter`, `file_search`, or + `function`. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not assistant_id: + raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/assistants/{assistant_id}", + body=maybe_transform( + { + "description": description, + "instructions": instructions, + "metadata": metadata, + "model": model, + "name": name, + "reasoning_effort": reasoning_effort, + "response_format": response_format, + "temperature": temperature, + "tool_resources": tool_resources, + "tools": tools, + "top_p": top_p, + }, + assistant_update_params.AssistantUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Assistant, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[Assistant]: + """Returns a list of assistants. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + "/assistants", + page=SyncCursorPage[Assistant], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + }, + assistant_list_params.AssistantListParams, + ), + ), + model=Assistant, + ) + + def delete( + self, + assistant_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantDeleted: + """ + Delete an assistant. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not assistant_id: + raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._delete( + f"/assistants/{assistant_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=AssistantDeleted, + ) + + +class AsyncAssistants(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncAssistantsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncAssistantsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncAssistantsWithStreamingResponse(self) + + async def create( + self, + *, + model: Union[str, ChatModel], + description: Optional[str] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + name: Optional[str] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Assistant: + """ + Create an assistant with a model and instructions. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + description: The description of the assistant. The maximum length is 512 characters. + + instructions: The system instructions that the assistant uses. The maximum length is 256,000 + characters. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + name: The name of the assistant. The maximum length is 256 characters. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per + assistant. Tools can be of types `code_interpreter`, `file_search`, or + `function`. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + "/assistants", + body=await async_maybe_transform( + { + "model": model, + "description": description, + "instructions": instructions, + "metadata": metadata, + "name": name, + "reasoning_effort": reasoning_effort, + "response_format": response_format, + "temperature": temperature, + "tool_resources": tool_resources, + "tools": tools, + "top_p": top_p, + }, + assistant_create_params.AssistantCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Assistant, + ) + + async def retrieve( + self, + assistant_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Assistant: + """ + Retrieves an assistant. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not assistant_id: + raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._get( + f"/assistants/{assistant_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Assistant, + ) + + async def update( + self, + assistant_id: str, + *, + description: Optional[str] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[ + str, + Literal[ + "o3-mini", + "o3-mini-2025-01-31", + "o1", + "o1-2024-12-17", + "gpt-4o", + "gpt-4o-2024-11-20", + "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + "gpt-4.5-preview", + "gpt-4.5-preview-2025-02-27", + "gpt-4-turbo", + "gpt-4-turbo-2024-04-09", + "gpt-4-0125-preview", + "gpt-4-turbo-preview", + "gpt-4-1106-preview", + "gpt-4-vision-preview", + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo-16k-0613", + ], + ] + | NotGiven = NOT_GIVEN, + name: Optional[str] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Assistant: + """Modifies an assistant. + + Args: + description: The description of the assistant. + + The maximum length is 512 characters. + + instructions: The system instructions that the assistant uses. The maximum length is 256,000 + characters. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + name: The name of the assistant. The maximum length is 256 characters. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per + assistant. Tools can be of types `code_interpreter`, `file_search`, or + `function`. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not assistant_id: + raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/assistants/{assistant_id}", + body=await async_maybe_transform( + { + "description": description, + "instructions": instructions, + "metadata": metadata, + "model": model, + "name": name, + "reasoning_effort": reasoning_effort, + "response_format": response_format, + "temperature": temperature, + "tool_resources": tool_resources, + "tools": tools, + "top_p": top_p, + }, + assistant_update_params.AssistantUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Assistant, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]: + """Returns a list of assistants. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + "/assistants", + page=AsyncCursorPage[Assistant], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + }, + assistant_list_params.AssistantListParams, + ), + ), + model=Assistant, + ) + + async def delete( + self, + assistant_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantDeleted: + """ + Delete an assistant. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not assistant_id: + raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._delete( + f"/assistants/{assistant_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=AssistantDeleted, + ) + + +class AssistantsWithRawResponse: + def __init__(self, assistants: Assistants) -> None: + self._assistants = assistants + + self.create = _legacy_response.to_raw_response_wrapper( + assistants.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + assistants.retrieve, + ) + self.update = _legacy_response.to_raw_response_wrapper( + assistants.update, + ) + self.list = _legacy_response.to_raw_response_wrapper( + assistants.list, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + assistants.delete, + ) + + +class AsyncAssistantsWithRawResponse: + def __init__(self, assistants: AsyncAssistants) -> None: + self._assistants = assistants + + self.create = _legacy_response.async_to_raw_response_wrapper( + assistants.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + assistants.retrieve, + ) + self.update = _legacy_response.async_to_raw_response_wrapper( + assistants.update, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + assistants.list, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + assistants.delete, + ) + + +class AssistantsWithStreamingResponse: + def __init__(self, assistants: Assistants) -> None: + self._assistants = assistants + + self.create = to_streamed_response_wrapper( + assistants.create, + ) + self.retrieve = to_streamed_response_wrapper( + assistants.retrieve, + ) + self.update = to_streamed_response_wrapper( + assistants.update, + ) + self.list = to_streamed_response_wrapper( + assistants.list, + ) + self.delete = to_streamed_response_wrapper( + assistants.delete, + ) + + +class AsyncAssistantsWithStreamingResponse: + def __init__(self, assistants: AsyncAssistants) -> None: + self._assistants = assistants + + self.create = async_to_streamed_response_wrapper( + assistants.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + assistants.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + assistants.update, + ) + self.list = async_to_streamed_response_wrapper( + assistants.list, + ) + self.delete = async_to_streamed_response_wrapper( + assistants.delete, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py new file mode 100644 index 00000000..62fc8258 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py @@ -0,0 +1,175 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from ..._compat import cached_property +from .chat.chat import Chat, AsyncChat +from .assistants import ( + Assistants, + AsyncAssistants, + AssistantsWithRawResponse, + AsyncAssistantsWithRawResponse, + AssistantsWithStreamingResponse, + AsyncAssistantsWithStreamingResponse, +) +from ..._resource import SyncAPIResource, AsyncAPIResource +from .threads.threads import ( + Threads, + AsyncThreads, + ThreadsWithRawResponse, + AsyncThreadsWithRawResponse, + ThreadsWithStreamingResponse, + AsyncThreadsWithStreamingResponse, +) +from .realtime.realtime import ( + Realtime, + AsyncRealtime, + RealtimeWithRawResponse, + AsyncRealtimeWithRawResponse, + RealtimeWithStreamingResponse, + AsyncRealtimeWithStreamingResponse, +) + +__all__ = ["Beta", "AsyncBeta"] + + +class Beta(SyncAPIResource): + @cached_property + def chat(self) -> Chat: + return Chat(self._client) + + @cached_property + def realtime(self) -> Realtime: + return Realtime(self._client) + + @cached_property + def assistants(self) -> Assistants: + return Assistants(self._client) + + @cached_property + def threads(self) -> Threads: + return Threads(self._client) + + @cached_property + def with_raw_response(self) -> BetaWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return BetaWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> BetaWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return BetaWithStreamingResponse(self) + + +class AsyncBeta(AsyncAPIResource): + @cached_property + def chat(self) -> AsyncChat: + return AsyncChat(self._client) + + @cached_property + def realtime(self) -> AsyncRealtime: + return AsyncRealtime(self._client) + + @cached_property + def assistants(self) -> AsyncAssistants: + return AsyncAssistants(self._client) + + @cached_property + def threads(self) -> AsyncThreads: + return AsyncThreads(self._client) + + @cached_property + def with_raw_response(self) -> AsyncBetaWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncBetaWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncBetaWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncBetaWithStreamingResponse(self) + + +class BetaWithRawResponse: + def __init__(self, beta: Beta) -> None: + self._beta = beta + + @cached_property + def realtime(self) -> RealtimeWithRawResponse: + return RealtimeWithRawResponse(self._beta.realtime) + + @cached_property + def assistants(self) -> AssistantsWithRawResponse: + return AssistantsWithRawResponse(self._beta.assistants) + + @cached_property + def threads(self) -> ThreadsWithRawResponse: + return ThreadsWithRawResponse(self._beta.threads) + + +class AsyncBetaWithRawResponse: + def __init__(self, beta: AsyncBeta) -> None: + self._beta = beta + + @cached_property + def realtime(self) -> AsyncRealtimeWithRawResponse: + return AsyncRealtimeWithRawResponse(self._beta.realtime) + + @cached_property + def assistants(self) -> AsyncAssistantsWithRawResponse: + return AsyncAssistantsWithRawResponse(self._beta.assistants) + + @cached_property + def threads(self) -> AsyncThreadsWithRawResponse: + return AsyncThreadsWithRawResponse(self._beta.threads) + + +class BetaWithStreamingResponse: + def __init__(self, beta: Beta) -> None: + self._beta = beta + + @cached_property + def realtime(self) -> RealtimeWithStreamingResponse: + return RealtimeWithStreamingResponse(self._beta.realtime) + + @cached_property + def assistants(self) -> AssistantsWithStreamingResponse: + return AssistantsWithStreamingResponse(self._beta.assistants) + + @cached_property + def threads(self) -> ThreadsWithStreamingResponse: + return ThreadsWithStreamingResponse(self._beta.threads) + + +class AsyncBetaWithStreamingResponse: + def __init__(self, beta: AsyncBeta) -> None: + self._beta = beta + + @cached_property + def realtime(self) -> AsyncRealtimeWithStreamingResponse: + return AsyncRealtimeWithStreamingResponse(self._beta.realtime) + + @cached_property + def assistants(self) -> AsyncAssistantsWithStreamingResponse: + return AsyncAssistantsWithStreamingResponse(self._beta.assistants) + + @cached_property + def threads(self) -> AsyncThreadsWithStreamingResponse: + return AsyncThreadsWithStreamingResponse(self._beta.threads) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py new file mode 100644 index 00000000..072d7867 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py @@ -0,0 +1,11 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .chat import Chat, AsyncChat +from .completions import Completions, AsyncCompletions + +__all__ = [ + "Completions", + "AsyncCompletions", + "Chat", + "AsyncChat", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py new file mode 100644 index 00000000..6afdcea3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from ...._compat import cached_property +from .completions import Completions, AsyncCompletions +from ...._resource import SyncAPIResource, AsyncAPIResource + +__all__ = ["Chat", "AsyncChat"] + + +class Chat(SyncAPIResource): + @cached_property + def completions(self) -> Completions: + return Completions(self._client) + + +class AsyncChat(AsyncAPIResource): + @cached_property + def completions(self) -> AsyncCompletions: + return AsyncCompletions(self._client) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py new file mode 100644 index 00000000..545a3f40 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py @@ -0,0 +1,634 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Type, Union, Iterable, Optional, cast +from functools import partial +from typing_extensions import Literal + +import httpx + +from .... import _legacy_response +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import maybe_transform, async_maybe_transform +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...._streaming import Stream +from ....types.chat import completion_create_params +from ...._base_client import make_request_options +from ....lib._parsing import ( + ResponseFormatT, + validate_input_tools as _validate_input_tools, + parse_chat_completion as _parse_chat_completion, + type_to_response_format_param as _type_to_response_format, +) +from ....types.chat_model import ChatModel +from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager +from ....types.shared_params import Metadata, ReasoningEffort +from ....types.chat.chat_completion import ChatCompletion +from ....types.chat.chat_completion_chunk import ChatCompletionChunk +from ....types.chat.parsed_chat_completion import ParsedChatCompletion +from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam +from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam +from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam +from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam +from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam +from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam + +__all__ = ["Completions", "AsyncCompletions"] + + +class Completions(SyncAPIResource): + @cached_property + def with_raw_response(self) -> CompletionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return the + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return CompletionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CompletionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return CompletionsWithStreamingResponse(self) + + def parse( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ParsedChatCompletion[ResponseFormatT]: + """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types + & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class. + + You can pass a pydantic model to this method and it will automatically convert the model + into a JSON schema, send it to the API and parse the response content back into the given model. + + This method will also automatically parse `function` tool calls if: + - You use the `openai.pydantic_function_tool()` helper method + - You mark your tool schema with `"strict": True` + + Example usage: + ```py + from pydantic import BaseModel + from openai import OpenAI + + + class Step(BaseModel): + explanation: str + output: str + + + class MathResponse(BaseModel): + steps: List[Step] + final_answer: str + + + client = OpenAI() + completion = client.beta.chat.completions.parse( + model="gpt-4o-2024-08-06", + messages=[ + {"role": "system", "content": "You are a helpful math tutor."}, + {"role": "user", "content": "solve 8x + 31 = 2"}, + ], + response_format=MathResponse, + ) + + message = completion.choices[0].message + if message.parsed: + print(message.parsed.steps) + print("answer: ", message.parsed.final_answer) + ``` + """ + _validate_input_tools(tools) + + extra_headers = { + "X-Stainless-Helper-Method": "beta.chat.completions.parse", + **(extra_headers or {}), + } + + def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]: + return _parse_chat_completion( + response_format=response_format, + chat_completion=raw_completion, + input_tools=tools, + ) + + return self._post( + "/chat/completions", + body=maybe_transform( + { + "messages": messages, + "model": model, + "audio": audio, + "frequency_penalty": frequency_penalty, + "function_call": function_call, + "functions": functions, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, + "max_tokens": max_tokens, + "metadata": metadata, + "modalities": modalities, + "n": n, + "parallel_tool_calls": parallel_tool_calls, + "prediction": prediction, + "presence_penalty": presence_penalty, + "reasoning_effort": reasoning_effort, + "response_format": _type_to_response_format(response_format), + "seed": seed, + "service_tier": service_tier, + "stop": stop, + "store": store, + "stream": False, + "stream_options": stream_options, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_logprobs": top_logprobs, + "top_p": top_p, + "user": user, + "web_search_options": web_search_options, + }, + completion_create_params.CompletionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + # we turn the `ChatCompletion` instance into a `ParsedChatCompletion` + # in the `parser` function above + cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion), + stream=False, + ) + + def stream( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletionStreamManager[ResponseFormatT]: + """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API + and automatic accumulation of each delta. + + This also supports all of the parsing utilities that `.parse()` does. + + Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response: + + ```py + with client.beta.chat.completions.stream( + model="gpt-4o-2024-08-06", + messages=[...], + ) as stream: + for event in stream: + if event.type == "content.delta": + print(event.delta, flush=True, end="") + ``` + + When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events). + + When the context manager exits, the response will be closed, however the `stream` instance is still available outside + the context manager. + """ + extra_headers = { + "X-Stainless-Helper-Method": "beta.chat.completions.stream", + **(extra_headers or {}), + } + + api_request: partial[Stream[ChatCompletionChunk]] = partial( + self._client.chat.completions.create, + messages=messages, + model=model, + audio=audio, + stream=True, + response_format=_type_to_response_format(response_format), + frequency_penalty=frequency_penalty, + function_call=function_call, + functions=functions, + logit_bias=logit_bias, + logprobs=logprobs, + max_completion_tokens=max_completion_tokens, + max_tokens=max_tokens, + metadata=metadata, + modalities=modalities, + n=n, + parallel_tool_calls=parallel_tool_calls, + prediction=prediction, + presence_penalty=presence_penalty, + reasoning_effort=reasoning_effort, + seed=seed, + service_tier=service_tier, + store=store, + stop=stop, + stream_options=stream_options, + temperature=temperature, + tool_choice=tool_choice, + tools=tools, + top_logprobs=top_logprobs, + top_p=top_p, + user=user, + web_search_options=web_search_options, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return ChatCompletionStreamManager( + api_request, + response_format=response_format, + input_tools=tools, + ) + + +class AsyncCompletions(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncCompletionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return the + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncCompletionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncCompletionsWithStreamingResponse(self) + + async def parse( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ParsedChatCompletion[ResponseFormatT]: + """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types + & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class. + + You can pass a pydantic model to this method and it will automatically convert the model + into a JSON schema, send it to the API and parse the response content back into the given model. + + This method will also automatically parse `function` tool calls if: + - You use the `openai.pydantic_function_tool()` helper method + - You mark your tool schema with `"strict": True` + + Example usage: + ```py + from pydantic import BaseModel + from openai import AsyncOpenAI + + + class Step(BaseModel): + explanation: str + output: str + + + class MathResponse(BaseModel): + steps: List[Step] + final_answer: str + + + client = AsyncOpenAI() + completion = await client.beta.chat.completions.parse( + model="gpt-4o-2024-08-06", + messages=[ + {"role": "system", "content": "You are a helpful math tutor."}, + {"role": "user", "content": "solve 8x + 31 = 2"}, + ], + response_format=MathResponse, + ) + + message = completion.choices[0].message + if message.parsed: + print(message.parsed.steps) + print("answer: ", message.parsed.final_answer) + ``` + """ + _validate_input_tools(tools) + + extra_headers = { + "X-Stainless-Helper-Method": "beta.chat.completions.parse", + **(extra_headers or {}), + } + + def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]: + return _parse_chat_completion( + response_format=response_format, + chat_completion=raw_completion, + input_tools=tools, + ) + + return await self._post( + "/chat/completions", + body=await async_maybe_transform( + { + "messages": messages, + "model": model, + "audio": audio, + "frequency_penalty": frequency_penalty, + "function_call": function_call, + "functions": functions, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, + "max_tokens": max_tokens, + "metadata": metadata, + "modalities": modalities, + "n": n, + "parallel_tool_calls": parallel_tool_calls, + "prediction": prediction, + "presence_penalty": presence_penalty, + "reasoning_effort": reasoning_effort, + "response_format": _type_to_response_format(response_format), + "seed": seed, + "service_tier": service_tier, + "store": store, + "stop": stop, + "stream": False, + "stream_options": stream_options, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_logprobs": top_logprobs, + "top_p": top_p, + "user": user, + "web_search_options": web_search_options, + }, + completion_create_params.CompletionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + # we turn the `ChatCompletion` instance into a `ParsedChatCompletion` + # in the `parser` function above + cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion), + stream=False, + ) + + def stream( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncChatCompletionStreamManager[ResponseFormatT]: + """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API + and automatic accumulation of each delta. + + This also supports all of the parsing utilities that `.parse()` does. + + Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response: + + ```py + async with client.beta.chat.completions.stream( + model="gpt-4o-2024-08-06", + messages=[...], + ) as stream: + async for event in stream: + if event.type == "content.delta": + print(event.delta, flush=True, end="") + ``` + + When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events). + + When the context manager exits, the response will be closed, however the `stream` instance is still available outside + the context manager. + """ + _validate_input_tools(tools) + + extra_headers = { + "X-Stainless-Helper-Method": "beta.chat.completions.stream", + **(extra_headers or {}), + } + + api_request = self._client.chat.completions.create( + messages=messages, + model=model, + audio=audio, + stream=True, + response_format=_type_to_response_format(response_format), + frequency_penalty=frequency_penalty, + function_call=function_call, + functions=functions, + logit_bias=logit_bias, + logprobs=logprobs, + max_completion_tokens=max_completion_tokens, + max_tokens=max_tokens, + metadata=metadata, + modalities=modalities, + n=n, + parallel_tool_calls=parallel_tool_calls, + prediction=prediction, + presence_penalty=presence_penalty, + reasoning_effort=reasoning_effort, + seed=seed, + service_tier=service_tier, + stop=stop, + store=store, + stream_options=stream_options, + temperature=temperature, + tool_choice=tool_choice, + tools=tools, + top_logprobs=top_logprobs, + top_p=top_p, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + web_search_options=web_search_options, + ) + return AsyncChatCompletionStreamManager( + api_request, + response_format=response_format, + input_tools=tools, + ) + + +class CompletionsWithRawResponse: + def __init__(self, completions: Completions) -> None: + self._completions = completions + + self.parse = _legacy_response.to_raw_response_wrapper( + completions.parse, + ) + + +class AsyncCompletionsWithRawResponse: + def __init__(self, completions: AsyncCompletions) -> None: + self._completions = completions + + self.parse = _legacy_response.async_to_raw_response_wrapper( + completions.parse, + ) + + +class CompletionsWithStreamingResponse: + def __init__(self, completions: Completions) -> None: + self._completions = completions + + self.parse = to_streamed_response_wrapper( + completions.parse, + ) + + +class AsyncCompletionsWithStreamingResponse: + def __init__(self, completions: AsyncCompletions) -> None: + self._completions = completions + + self.parse = async_to_streamed_response_wrapper( + completions.parse, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py new file mode 100644 index 00000000..7ab3d993 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py @@ -0,0 +1,47 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .realtime import ( + Realtime, + AsyncRealtime, + RealtimeWithRawResponse, + AsyncRealtimeWithRawResponse, + RealtimeWithStreamingResponse, + AsyncRealtimeWithStreamingResponse, +) +from .sessions import ( + Sessions, + AsyncSessions, + SessionsWithRawResponse, + AsyncSessionsWithRawResponse, + SessionsWithStreamingResponse, + AsyncSessionsWithStreamingResponse, +) +from .transcription_sessions import ( + TranscriptionSessions, + AsyncTranscriptionSessions, + TranscriptionSessionsWithRawResponse, + AsyncTranscriptionSessionsWithRawResponse, + TranscriptionSessionsWithStreamingResponse, + AsyncTranscriptionSessionsWithStreamingResponse, +) + +__all__ = [ + "Sessions", + "AsyncSessions", + "SessionsWithRawResponse", + "AsyncSessionsWithRawResponse", + "SessionsWithStreamingResponse", + "AsyncSessionsWithStreamingResponse", + "TranscriptionSessions", + "AsyncTranscriptionSessions", + "TranscriptionSessionsWithRawResponse", + "AsyncTranscriptionSessionsWithRawResponse", + "TranscriptionSessionsWithStreamingResponse", + "AsyncTranscriptionSessionsWithStreamingResponse", + "Realtime", + "AsyncRealtime", + "RealtimeWithRawResponse", + "AsyncRealtimeWithRawResponse", + "RealtimeWithStreamingResponse", + "AsyncRealtimeWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py new file mode 100644 index 00000000..76e57f8c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py @@ -0,0 +1,1066 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import json +import logging +from types import TracebackType +from typing import TYPE_CHECKING, Any, Iterator, cast +from typing_extensions import AsyncIterator + +import httpx +from pydantic import BaseModel + +from .sessions import ( + Sessions, + AsyncSessions, + SessionsWithRawResponse, + AsyncSessionsWithRawResponse, + SessionsWithStreamingResponse, + AsyncSessionsWithStreamingResponse, +) +from ...._types import NOT_GIVEN, Query, Headers, NotGiven +from ...._utils import ( + is_azure_client, + maybe_transform, + strip_not_given, + async_maybe_transform, + is_async_azure_client, +) +from ...._compat import cached_property +from ...._models import construct_type_unchecked +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._exceptions import OpenAIError +from ...._base_client import _merge_mappings +from ....types.beta.realtime import ( + session_update_event_param, + response_create_event_param, + transcription_session_update_param, +) +from .transcription_sessions import ( + TranscriptionSessions, + AsyncTranscriptionSessions, + TranscriptionSessionsWithRawResponse, + AsyncTranscriptionSessionsWithRawResponse, + TranscriptionSessionsWithStreamingResponse, + AsyncTranscriptionSessionsWithStreamingResponse, +) +from ....types.websocket_connection_options import WebsocketConnectionOptions +from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent +from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent +from ....types.beta.realtime.conversation_item_param import ConversationItemParam +from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam + +if TYPE_CHECKING: + from websockets.sync.client import ClientConnection as WebsocketConnection + from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection + + from ...._client import OpenAI, AsyncOpenAI + +__all__ = ["Realtime", "AsyncRealtime"] + +log: logging.Logger = logging.getLogger(__name__) + + +class Realtime(SyncAPIResource): + @cached_property + def sessions(self) -> Sessions: + return Sessions(self._client) + + @cached_property + def transcription_sessions(self) -> TranscriptionSessions: + return TranscriptionSessions(self._client) + + @cached_property + def with_raw_response(self) -> RealtimeWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return RealtimeWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> RealtimeWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return RealtimeWithStreamingResponse(self) + + def connect( + self, + *, + model: str, + extra_query: Query = {}, + extra_headers: Headers = {}, + websocket_connection_options: WebsocketConnectionOptions = {}, + ) -> RealtimeConnectionManager: + """ + The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling. + + Some notable benefits of the API include: + + - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output. + - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction. + - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback. + + The Realtime API is a stateful, event-based API that communicates over a WebSocket. + """ + return RealtimeConnectionManager( + client=self._client, + extra_query=extra_query, + extra_headers=extra_headers, + websocket_connection_options=websocket_connection_options, + model=model, + ) + + +class AsyncRealtime(AsyncAPIResource): + @cached_property + def sessions(self) -> AsyncSessions: + return AsyncSessions(self._client) + + @cached_property + def transcription_sessions(self) -> AsyncTranscriptionSessions: + return AsyncTranscriptionSessions(self._client) + + @cached_property + def with_raw_response(self) -> AsyncRealtimeWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncRealtimeWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncRealtimeWithStreamingResponse(self) + + def connect( + self, + *, + model: str, + extra_query: Query = {}, + extra_headers: Headers = {}, + websocket_connection_options: WebsocketConnectionOptions = {}, + ) -> AsyncRealtimeConnectionManager: + """ + The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling. + + Some notable benefits of the API include: + + - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output. + - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction. + - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback. + + The Realtime API is a stateful, event-based API that communicates over a WebSocket. + """ + return AsyncRealtimeConnectionManager( + client=self._client, + extra_query=extra_query, + extra_headers=extra_headers, + websocket_connection_options=websocket_connection_options, + model=model, + ) + + +class RealtimeWithRawResponse: + def __init__(self, realtime: Realtime) -> None: + self._realtime = realtime + + @cached_property + def sessions(self) -> SessionsWithRawResponse: + return SessionsWithRawResponse(self._realtime.sessions) + + @cached_property + def transcription_sessions(self) -> TranscriptionSessionsWithRawResponse: + return TranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions) + + +class AsyncRealtimeWithRawResponse: + def __init__(self, realtime: AsyncRealtime) -> None: + self._realtime = realtime + + @cached_property + def sessions(self) -> AsyncSessionsWithRawResponse: + return AsyncSessionsWithRawResponse(self._realtime.sessions) + + @cached_property + def transcription_sessions(self) -> AsyncTranscriptionSessionsWithRawResponse: + return AsyncTranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions) + + +class RealtimeWithStreamingResponse: + def __init__(self, realtime: Realtime) -> None: + self._realtime = realtime + + @cached_property + def sessions(self) -> SessionsWithStreamingResponse: + return SessionsWithStreamingResponse(self._realtime.sessions) + + @cached_property + def transcription_sessions(self) -> TranscriptionSessionsWithStreamingResponse: + return TranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions) + + +class AsyncRealtimeWithStreamingResponse: + def __init__(self, realtime: AsyncRealtime) -> None: + self._realtime = realtime + + @cached_property + def sessions(self) -> AsyncSessionsWithStreamingResponse: + return AsyncSessionsWithStreamingResponse(self._realtime.sessions) + + @cached_property + def transcription_sessions(self) -> AsyncTranscriptionSessionsWithStreamingResponse: + return AsyncTranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions) + + +class AsyncRealtimeConnection: + """Represents a live websocket connection to the Realtime API""" + + session: AsyncRealtimeSessionResource + response: AsyncRealtimeResponseResource + input_audio_buffer: AsyncRealtimeInputAudioBufferResource + conversation: AsyncRealtimeConversationResource + transcription_session: AsyncRealtimeTranscriptionSessionResource + + _connection: AsyncWebsocketConnection + + def __init__(self, connection: AsyncWebsocketConnection) -> None: + self._connection = connection + + self.session = AsyncRealtimeSessionResource(self) + self.response = AsyncRealtimeResponseResource(self) + self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self) + self.conversation = AsyncRealtimeConversationResource(self) + self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self) + + async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]: + """ + An infinite-iterator that will continue to yield events until + the connection is closed. + """ + from websockets.exceptions import ConnectionClosedOK + + try: + while True: + yield await self.recv() + except ConnectionClosedOK: + return + + async def recv(self) -> RealtimeServerEvent: + """ + Receive the next message from the connection and parses it into a `RealtimeServerEvent` object. + + Canceling this method is safe. There's no risk of losing data. + """ + return self.parse_event(await self.recv_bytes()) + + async def recv_bytes(self) -> bytes: + """Receive the next message from the connection as raw bytes. + + Canceling this method is safe. There's no risk of losing data. + + If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does, + then you can call `.parse_event(data)`. + """ + message = await self._connection.recv(decode=False) + log.debug(f"Received websocket message: %s", message) + if not isinstance(message, bytes): + # passing `decode=False` should always result in us getting `bytes` back + raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}") + + return message + + async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None: + data = ( + event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True) + if isinstance(event, BaseModel) + else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam)) + ) + await self._connection.send(data) + + async def close(self, *, code: int = 1000, reason: str = "") -> None: + await self._connection.close(code=code, reason=reason) + + def parse_event(self, data: str | bytes) -> RealtimeServerEvent: + """ + Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object. + + This is helpful if you're using `.recv_bytes()`. + """ + return cast( + RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent)) + ) + + +class AsyncRealtimeConnectionManager: + """ + Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()` + + This context manager ensures that the connection will be closed when it exits. + + --- + + Note that if your application doesn't work well with the context manager approach then you + can call the `.enter()` method directly to initiate a connection. + + **Warning**: You must remember to close the connection with `.close()`. + + ```py + connection = await client.beta.realtime.connect(...).enter() + # ... + await connection.close() + ``` + """ + + def __init__( + self, + *, + client: AsyncOpenAI, + model: str, + extra_query: Query, + extra_headers: Headers, + websocket_connection_options: WebsocketConnectionOptions, + ) -> None: + self.__client = client + self.__model = model + self.__connection: AsyncRealtimeConnection | None = None + self.__extra_query = extra_query + self.__extra_headers = extra_headers + self.__websocket_connection_options = websocket_connection_options + + async def __aenter__(self) -> AsyncRealtimeConnection: + """ + 👋 If your application doesn't work well with the context manager approach then you + can call this method directly to initiate a connection. + + **Warning**: You must remember to close the connection with `.close()`. + + ```py + connection = await client.beta.realtime.connect(...).enter() + # ... + await connection.close() + ``` + """ + try: + from websockets.asyncio.client import connect + except ImportError as exc: + raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc + + extra_query = self.__extra_query + auth_headers = self.__client.auth_headers + if is_async_azure_client(self.__client): + url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query) + else: + url = self._prepare_url().copy_with( + params={ + **self.__client.base_url.params, + "model": self.__model, + **extra_query, + }, + ) + log.debug("Connecting to %s", url) + if self.__websocket_connection_options: + log.debug("Connection options: %s", self.__websocket_connection_options) + + self.__connection = AsyncRealtimeConnection( + await connect( + str(url), + user_agent_header=self.__client.user_agent, + additional_headers=_merge_mappings( + { + **auth_headers, + "OpenAI-Beta": "realtime=v1", + }, + self.__extra_headers, + ), + **self.__websocket_connection_options, + ) + ) + + return self.__connection + + enter = __aenter__ + + def _prepare_url(self) -> httpx.URL: + if self.__client.websocket_base_url is not None: + base_url = httpx.URL(self.__client.websocket_base_url) + else: + base_url = self.__client._base_url.copy_with(scheme="wss") + + merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime" + return base_url.copy_with(raw_path=merge_raw_path) + + async def __aexit__( + self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None + ) -> None: + if self.__connection is not None: + await self.__connection.close() + + +class RealtimeConnection: + """Represents a live websocket connection to the Realtime API""" + + session: RealtimeSessionResource + response: RealtimeResponseResource + input_audio_buffer: RealtimeInputAudioBufferResource + conversation: RealtimeConversationResource + transcription_session: RealtimeTranscriptionSessionResource + + _connection: WebsocketConnection + + def __init__(self, connection: WebsocketConnection) -> None: + self._connection = connection + + self.session = RealtimeSessionResource(self) + self.response = RealtimeResponseResource(self) + self.input_audio_buffer = RealtimeInputAudioBufferResource(self) + self.conversation = RealtimeConversationResource(self) + self.transcription_session = RealtimeTranscriptionSessionResource(self) + + def __iter__(self) -> Iterator[RealtimeServerEvent]: + """ + An infinite-iterator that will continue to yield events until + the connection is closed. + """ + from websockets.exceptions import ConnectionClosedOK + + try: + while True: + yield self.recv() + except ConnectionClosedOK: + return + + def recv(self) -> RealtimeServerEvent: + """ + Receive the next message from the connection and parses it into a `RealtimeServerEvent` object. + + Canceling this method is safe. There's no risk of losing data. + """ + return self.parse_event(self.recv_bytes()) + + def recv_bytes(self) -> bytes: + """Receive the next message from the connection as raw bytes. + + Canceling this method is safe. There's no risk of losing data. + + If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does, + then you can call `.parse_event(data)`. + """ + message = self._connection.recv(decode=False) + log.debug(f"Received websocket message: %s", message) + if not isinstance(message, bytes): + # passing `decode=False` should always result in us getting `bytes` back + raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}") + + return message + + def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None: + data = ( + event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True) + if isinstance(event, BaseModel) + else json.dumps(maybe_transform(event, RealtimeClientEventParam)) + ) + self._connection.send(data) + + def close(self, *, code: int = 1000, reason: str = "") -> None: + self._connection.close(code=code, reason=reason) + + def parse_event(self, data: str | bytes) -> RealtimeServerEvent: + """ + Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object. + + This is helpful if you're using `.recv_bytes()`. + """ + return cast( + RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent)) + ) + + +class RealtimeConnectionManager: + """ + Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()` + + This context manager ensures that the connection will be closed when it exits. + + --- + + Note that if your application doesn't work well with the context manager approach then you + can call the `.enter()` method directly to initiate a connection. + + **Warning**: You must remember to close the connection with `.close()`. + + ```py + connection = client.beta.realtime.connect(...).enter() + # ... + connection.close() + ``` + """ + + def __init__( + self, + *, + client: OpenAI, + model: str, + extra_query: Query, + extra_headers: Headers, + websocket_connection_options: WebsocketConnectionOptions, + ) -> None: + self.__client = client + self.__model = model + self.__connection: RealtimeConnection | None = None + self.__extra_query = extra_query + self.__extra_headers = extra_headers + self.__websocket_connection_options = websocket_connection_options + + def __enter__(self) -> RealtimeConnection: + """ + 👋 If your application doesn't work well with the context manager approach then you + can call this method directly to initiate a connection. + + **Warning**: You must remember to close the connection with `.close()`. + + ```py + connection = client.beta.realtime.connect(...).enter() + # ... + connection.close() + ``` + """ + try: + from websockets.sync.client import connect + except ImportError as exc: + raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc + + extra_query = self.__extra_query + auth_headers = self.__client.auth_headers + if is_azure_client(self.__client): + url, auth_headers = self.__client._configure_realtime(self.__model, extra_query) + else: + url = self._prepare_url().copy_with( + params={ + **self.__client.base_url.params, + "model": self.__model, + **extra_query, + }, + ) + log.debug("Connecting to %s", url) + if self.__websocket_connection_options: + log.debug("Connection options: %s", self.__websocket_connection_options) + + self.__connection = RealtimeConnection( + connect( + str(url), + user_agent_header=self.__client.user_agent, + additional_headers=_merge_mappings( + { + **auth_headers, + "OpenAI-Beta": "realtime=v1", + }, + self.__extra_headers, + ), + **self.__websocket_connection_options, + ) + ) + + return self.__connection + + enter = __enter__ + + def _prepare_url(self) -> httpx.URL: + if self.__client.websocket_base_url is not None: + base_url = httpx.URL(self.__client.websocket_base_url) + else: + base_url = self.__client._base_url.copy_with(scheme="wss") + + merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime" + return base_url.copy_with(raw_path=merge_raw_path) + + def __exit__( + self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None + ) -> None: + if self.__connection is not None: + self.__connection.close() + + +class BaseRealtimeConnectionResource: + def __init__(self, connection: RealtimeConnection) -> None: + self._connection = connection + + +class RealtimeSessionResource(BaseRealtimeConnectionResource): + def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None: + """ + Send this event to update the session’s default configuration. + The client may send this event at any time to update any field, + except for `voice`. However, note that once a session has been + initialized with a particular `model`, it can’t be changed to + another model using `session.update`. + + When the server receives a `session.update`, it will respond + with a `session.updated` event showing the full, effective configuration. + Only the fields that are present are updated. To clear a field like + `instructions`, pass an empty string. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "session.update", "session": session, "event_id": event_id}), + ) + ) + + +class RealtimeResponseResource(BaseRealtimeConnectionResource): + def create( + self, + *, + event_id: str | NotGiven = NOT_GIVEN, + response: response_create_event_param.Response | NotGiven = NOT_GIVEN, + ) -> None: + """ + This event instructs the server to create a Response, which means triggering + model inference. When in Server VAD mode, the server will create Responses + automatically. + + A Response will include at least one Item, and may have two, in which case + the second will be a function call. These Items will be appended to the + conversation history. + + The server will respond with a `response.created` event, events for Items + and content created, and finally a `response.done` event to indicate the + Response is complete. + + The `response.create` event includes inference configuration like + `instructions`, and `temperature`. These fields will override the Session's + configuration for this Response only. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "response.create", "event_id": event_id, "response": response}), + ) + ) + + def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to cancel an in-progress response. + + The server will respond + with a `response.cancelled` event or an error if there is no response to + cancel. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}), + ) + ) + + +class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource): + def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to clear the audio bytes in the buffer. + + The server will + respond with an `input_audio_buffer.cleared` event. + """ + self._connection.send( + cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id})) + ) + + def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: + """ + Send this event to commit the user input audio buffer, which will create a + new user message item in the conversation. This event will produce an error + if the input audio buffer is empty. When in Server VAD mode, the client does + not need to send this event, the server will commit the audio buffer + automatically. + + Committing the input audio buffer will trigger input audio transcription + (if enabled in session configuration), but it will not create a response + from the model. The server will respond with an `input_audio_buffer.committed` + event. + """ + self._connection.send( + cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id})) + ) + + def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to append audio bytes to the input audio buffer. + + The audio + buffer is temporary storage you can write to and later commit. In Server VAD + mode, the audio buffer is used to detect speech and the server will decide + when to commit. When Server VAD is disabled, you must commit the audio buffer + manually. + + The client may choose how much audio to place in each event up to a maximum + of 15 MiB, for example streaming smaller chunks from the client may allow the + VAD to be more responsive. Unlike made other client events, the server will + not send a confirmation response to this event. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}), + ) + ) + + +class RealtimeConversationResource(BaseRealtimeConnectionResource): + @cached_property + def item(self) -> RealtimeConversationItemResource: + return RealtimeConversationItemResource(self._connection) + + +class RealtimeConversationItemResource(BaseRealtimeConnectionResource): + def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event when you want to remove any item from the conversation + history. + + The server will respond with a `conversation.item.deleted` event, + unless the item does not exist in the conversation history, in which case the + server will respond with an error. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}), + ) + ) + + def create( + self, + *, + item: ConversationItemParam, + event_id: str | NotGiven = NOT_GIVEN, + previous_item_id: str | NotGiven = NOT_GIVEN, + ) -> None: + """ + Add a new Item to the Conversation's context, including messages, function + calls, and function call responses. This event can be used both to populate a + "history" of the conversation and to add new items mid-stream, but has the + current limitation that it cannot populate assistant audio messages. + + If successful, the server will respond with a `conversation.item.created` + event, otherwise an `error` event will be sent. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given( + { + "type": "conversation.item.create", + "item": item, + "event_id": event_id, + "previous_item_id": previous_item_id, + } + ), + ) + ) + + def truncate( + self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN + ) -> None: + """Send this event to truncate a previous assistant message’s audio. + + The server + will produce audio faster than realtime, so this event is useful when the user + interrupts to truncate audio that has already been sent to the client but not + yet played. This will synchronize the server's understanding of the audio with + the client's playback. + + Truncating audio will delete the server-side text transcript to ensure there + is not text in the context that hasn't been heard by the user. + + If successful, the server will respond with a `conversation.item.truncated` + event. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given( + { + "type": "conversation.item.truncate", + "audio_end_ms": audio_end_ms, + "content_index": content_index, + "item_id": item_id, + "event_id": event_id, + } + ), + ) + ) + + def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """ + Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD. + The server will respond with a `conversation.item.retrieved` event, + unless the item does not exist in the conversation history, in which case the + server will respond with an error. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}), + ) + ) + + +class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource): + def update( + self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN + ) -> None: + """Send this event to update a transcription session.""" + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}), + ) + ) + + +class BaseAsyncRealtimeConnectionResource: + def __init__(self, connection: AsyncRealtimeConnection) -> None: + self._connection = connection + + +class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource): + async def update( + self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN + ) -> None: + """ + Send this event to update the session’s default configuration. + The client may send this event at any time to update any field, + except for `voice`. However, note that once a session has been + initialized with a particular `model`, it can’t be changed to + another model using `session.update`. + + When the server receives a `session.update`, it will respond + with a `session.updated` event showing the full, effective configuration. + Only the fields that are present are updated. To clear a field like + `instructions`, pass an empty string. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "session.update", "session": session, "event_id": event_id}), + ) + ) + + +class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource): + async def create( + self, + *, + event_id: str | NotGiven = NOT_GIVEN, + response: response_create_event_param.Response | NotGiven = NOT_GIVEN, + ) -> None: + """ + This event instructs the server to create a Response, which means triggering + model inference. When in Server VAD mode, the server will create Responses + automatically. + + A Response will include at least one Item, and may have two, in which case + the second will be a function call. These Items will be appended to the + conversation history. + + The server will respond with a `response.created` event, events for Items + and content created, and finally a `response.done` event to indicate the + Response is complete. + + The `response.create` event includes inference configuration like + `instructions`, and `temperature`. These fields will override the Session's + configuration for this Response only. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "response.create", "event_id": event_id, "response": response}), + ) + ) + + async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to cancel an in-progress response. + + The server will respond + with a `response.cancelled` event or an error if there is no response to + cancel. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}), + ) + ) + + +class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource): + async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to clear the audio bytes in the buffer. + + The server will + respond with an `input_audio_buffer.cleared` event. + """ + await self._connection.send( + cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id})) + ) + + async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: + """ + Send this event to commit the user input audio buffer, which will create a + new user message item in the conversation. This event will produce an error + if the input audio buffer is empty. When in Server VAD mode, the client does + not need to send this event, the server will commit the audio buffer + automatically. + + Committing the input audio buffer will trigger input audio transcription + (if enabled in session configuration), but it will not create a response + from the model. The server will respond with an `input_audio_buffer.committed` + event. + """ + await self._connection.send( + cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id})) + ) + + async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to append audio bytes to the input audio buffer. + + The audio + buffer is temporary storage you can write to and later commit. In Server VAD + mode, the audio buffer is used to detect speech and the server will decide + when to commit. When Server VAD is disabled, you must commit the audio buffer + manually. + + The client may choose how much audio to place in each event up to a maximum + of 15 MiB, for example streaming smaller chunks from the client may allow the + VAD to be more responsive. Unlike made other client events, the server will + not send a confirmation response to this event. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}), + ) + ) + + +class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource): + @cached_property + def item(self) -> AsyncRealtimeConversationItemResource: + return AsyncRealtimeConversationItemResource(self._connection) + + +class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource): + async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event when you want to remove any item from the conversation + history. + + The server will respond with a `conversation.item.deleted` event, + unless the item does not exist in the conversation history, in which case the + server will respond with an error. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}), + ) + ) + + async def create( + self, + *, + item: ConversationItemParam, + event_id: str | NotGiven = NOT_GIVEN, + previous_item_id: str | NotGiven = NOT_GIVEN, + ) -> None: + """ + Add a new Item to the Conversation's context, including messages, function + calls, and function call responses. This event can be used both to populate a + "history" of the conversation and to add new items mid-stream, but has the + current limitation that it cannot populate assistant audio messages. + + If successful, the server will respond with a `conversation.item.created` + event, otherwise an `error` event will be sent. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given( + { + "type": "conversation.item.create", + "item": item, + "event_id": event_id, + "previous_item_id": previous_item_id, + } + ), + ) + ) + + async def truncate( + self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN + ) -> None: + """Send this event to truncate a previous assistant message’s audio. + + The server + will produce audio faster than realtime, so this event is useful when the user + interrupts to truncate audio that has already been sent to the client but not + yet played. This will synchronize the server's understanding of the audio with + the client's playback. + + Truncating audio will delete the server-side text transcript to ensure there + is not text in the context that hasn't been heard by the user. + + If successful, the server will respond with a `conversation.item.truncated` + event. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given( + { + "type": "conversation.item.truncate", + "audio_end_ms": audio_end_ms, + "content_index": content_index, + "item_id": item_id, + "event_id": event_id, + } + ), + ) + ) + + async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """ + Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD. + The server will respond with a `conversation.item.retrieved` event, + unless the item does not exist in the conversation history, in which case the + server will respond with an error. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}), + ) + ) + + +class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource): + async def update( + self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN + ) -> None: + """Send this event to update a transcription session.""" + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}), + ) + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py new file mode 100644 index 00000000..5884e54d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py @@ -0,0 +1,383 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Union, Iterable +from typing_extensions import Literal + +import httpx + +from .... import _legacy_response +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import ( + maybe_transform, + async_maybe_transform, +) +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...._base_client import make_request_options +from ....types.beta.realtime import session_create_params +from ....types.beta.realtime.session_create_response import SessionCreateResponse + +__all__ = ["Sessions", "AsyncSessions"] + + +class Sessions(SyncAPIResource): + @cached_property + def with_raw_response(self) -> SessionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return SessionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> SessionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return SessionsWithStreamingResponse(self) + + def create( + self, + *, + input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN, + input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN, + instructions: str | NotGiven = NOT_GIVEN, + max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN, + modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN, + model: Literal[ + "gpt-4o-realtime-preview", + "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-realtime-preview-2024-12-17", + "gpt-4o-mini-realtime-preview", + "gpt-4o-mini-realtime-preview-2024-12-17", + ] + | NotGiven = NOT_GIVEN, + output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: str | NotGiven = NOT_GIVEN, + tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN, + turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN, + voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SessionCreateResponse: + """ + Create an ephemeral API token for use in client-side applications with the + Realtime API. Can be configured with the same session parameters as the + `session.update` client event. + + It responds with a session object, plus a `client_secret` key which contains a + usable ephemeral API token that can be used to authenticate browser clients for + the Realtime API. + + Args: + input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + (mono), and little-endian byte order. + + input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn + off. Noise reduction filters audio added to the input audio buffer before it is + sent to VAD and the model. Filtering the audio can improve VAD and turn + detection accuracy (reducing false positives) and model performance by improving + perception of the input audio. + + input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through + [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + and should be treated as guidance of input audio content rather than precisely + what the model heard. The client can optionally set the language and prompt for + transcription, these offer additional guidance to the transcription service. + + instructions: The default system instructions (i.e. system message) prepended to model calls. + This field allows the client to guide the model on desired responses. The model + can be instructed on response content and format, (e.g. "be extremely succinct", + "act friendly", "here are examples of good responses") and on audio behavior + (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The + instructions are not guaranteed to be followed by the model, but they provide + guidance to the model on the desired behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + + max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + + modalities: The set of modalities the model can respond with. To disable audio, set this to + ["text"]. + + model: The Realtime model used for this session. + + output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + For `pcm16`, output audio is sampled at a rate of 24kHz. + + temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a + temperature of 0.8 is highly recommended for best performance. + + tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify + a function. + + tools: Tools (functions) available to the model. + + turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + set to `null` to turn off, in which case the client must manually trigger model + response. Server VAD means that the model will detect the start and end of + speech based on audio volume and respond at the end of user speech. Semantic VAD + is more advanced and uses a turn detection model (in conjuction with VAD) to + semantically estimate whether the user has finished speaking, then dynamically + sets a timeout based on this probability. For example, if user audio trails off + with "uhhm", the model will score a low probability of turn end and wait longer + for the user to continue speaking. This can be useful for more natural + conversations, but may have a higher latency. + + voice: The voice the model uses to respond. Voice cannot be changed during the session + once the model has responded with audio at least once. Current voice options are + `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + "/realtime/sessions", + body=maybe_transform( + { + "input_audio_format": input_audio_format, + "input_audio_noise_reduction": input_audio_noise_reduction, + "input_audio_transcription": input_audio_transcription, + "instructions": instructions, + "max_response_output_tokens": max_response_output_tokens, + "modalities": modalities, + "model": model, + "output_audio_format": output_audio_format, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "turn_detection": turn_detection, + "voice": voice, + }, + session_create_params.SessionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=SessionCreateResponse, + ) + + +class AsyncSessions(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncSessionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncSessionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncSessionsWithStreamingResponse(self) + + async def create( + self, + *, + input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN, + input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN, + instructions: str | NotGiven = NOT_GIVEN, + max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN, + modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN, + model: Literal[ + "gpt-4o-realtime-preview", + "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-realtime-preview-2024-12-17", + "gpt-4o-mini-realtime-preview", + "gpt-4o-mini-realtime-preview-2024-12-17", + ] + | NotGiven = NOT_GIVEN, + output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: str | NotGiven = NOT_GIVEN, + tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN, + turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN, + voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SessionCreateResponse: + """ + Create an ephemeral API token for use in client-side applications with the + Realtime API. Can be configured with the same session parameters as the + `session.update` client event. + + It responds with a session object, plus a `client_secret` key which contains a + usable ephemeral API token that can be used to authenticate browser clients for + the Realtime API. + + Args: + input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + (mono), and little-endian byte order. + + input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn + off. Noise reduction filters audio added to the input audio buffer before it is + sent to VAD and the model. Filtering the audio can improve VAD and turn + detection accuracy (reducing false positives) and model performance by improving + perception of the input audio. + + input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through + [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + and should be treated as guidance of input audio content rather than precisely + what the model heard. The client can optionally set the language and prompt for + transcription, these offer additional guidance to the transcription service. + + instructions: The default system instructions (i.e. system message) prepended to model calls. + This field allows the client to guide the model on desired responses. The model + can be instructed on response content and format, (e.g. "be extremely succinct", + "act friendly", "here are examples of good responses") and on audio behavior + (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The + instructions are not guaranteed to be followed by the model, but they provide + guidance to the model on the desired behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + + max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + + modalities: The set of modalities the model can respond with. To disable audio, set this to + ["text"]. + + model: The Realtime model used for this session. + + output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + For `pcm16`, output audio is sampled at a rate of 24kHz. + + temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a + temperature of 0.8 is highly recommended for best performance. + + tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify + a function. + + tools: Tools (functions) available to the model. + + turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + set to `null` to turn off, in which case the client must manually trigger model + response. Server VAD means that the model will detect the start and end of + speech based on audio volume and respond at the end of user speech. Semantic VAD + is more advanced and uses a turn detection model (in conjuction with VAD) to + semantically estimate whether the user has finished speaking, then dynamically + sets a timeout based on this probability. For example, if user audio trails off + with "uhhm", the model will score a low probability of turn end and wait longer + for the user to continue speaking. This can be useful for more natural + conversations, but may have a higher latency. + + voice: The voice the model uses to respond. Voice cannot be changed during the session + once the model has responded with audio at least once. Current voice options are + `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + "/realtime/sessions", + body=await async_maybe_transform( + { + "input_audio_format": input_audio_format, + "input_audio_noise_reduction": input_audio_noise_reduction, + "input_audio_transcription": input_audio_transcription, + "instructions": instructions, + "max_response_output_tokens": max_response_output_tokens, + "modalities": modalities, + "model": model, + "output_audio_format": output_audio_format, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "turn_detection": turn_detection, + "voice": voice, + }, + session_create_params.SessionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=SessionCreateResponse, + ) + + +class SessionsWithRawResponse: + def __init__(self, sessions: Sessions) -> None: + self._sessions = sessions + + self.create = _legacy_response.to_raw_response_wrapper( + sessions.create, + ) + + +class AsyncSessionsWithRawResponse: + def __init__(self, sessions: AsyncSessions) -> None: + self._sessions = sessions + + self.create = _legacy_response.async_to_raw_response_wrapper( + sessions.create, + ) + + +class SessionsWithStreamingResponse: + def __init__(self, sessions: Sessions) -> None: + self._sessions = sessions + + self.create = to_streamed_response_wrapper( + sessions.create, + ) + + +class AsyncSessionsWithStreamingResponse: + def __init__(self, sessions: AsyncSessions) -> None: + self._sessions = sessions + + self.create = async_to_streamed_response_wrapper( + sessions.create, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py new file mode 100644 index 00000000..0917da71 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py @@ -0,0 +1,277 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List +from typing_extensions import Literal + +import httpx + +from .... import _legacy_response +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import ( + maybe_transform, + async_maybe_transform, +) +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...._base_client import make_request_options +from ....types.beta.realtime import transcription_session_create_params +from ....types.beta.realtime.transcription_session import TranscriptionSession + +__all__ = ["TranscriptionSessions", "AsyncTranscriptionSessions"] + + +class TranscriptionSessions(SyncAPIResource): + @cached_property + def with_raw_response(self) -> TranscriptionSessionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return TranscriptionSessionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> TranscriptionSessionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return TranscriptionSessionsWithStreamingResponse(self) + + def create( + self, + *, + include: List[str] | NotGiven = NOT_GIVEN, + input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction + | NotGiven = NOT_GIVEN, + input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN, + modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN, + turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TranscriptionSession: + """ + Create an ephemeral API token for use in client-side applications with the + Realtime API specifically for realtime transcriptions. Can be configured with + the same session parameters as the `transcription_session.update` client event. + + It responds with a session object, plus a `client_secret` key which contains a + usable ephemeral API token that can be used to authenticate browser clients for + the Realtime API. + + Args: + include: + The set of items to include in the transcription. Current available items are: + + - `item.input_audio_transcription.logprobs` + + input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + (mono), and little-endian byte order. + + input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn + off. Noise reduction filters audio added to the input audio buffer before it is + sent to VAD and the model. Filtering the audio can improve VAD and turn + detection accuracy (reducing false positives) and model performance by improving + perception of the input audio. + + input_audio_transcription: Configuration for input audio transcription. The client can optionally set the + language and prompt for transcription, these offer additional guidance to the + transcription service. + + modalities: The set of modalities the model can respond with. To disable audio, set this to + ["text"]. + + turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + set to `null` to turn off, in which case the client must manually trigger model + response. Server VAD means that the model will detect the start and end of + speech based on audio volume and respond at the end of user speech. Semantic VAD + is more advanced and uses a turn detection model (in conjuction with VAD) to + semantically estimate whether the user has finished speaking, then dynamically + sets a timeout based on this probability. For example, if user audio trails off + with "uhhm", the model will score a low probability of turn end and wait longer + for the user to continue speaking. This can be useful for more natural + conversations, but may have a higher latency. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + "/realtime/transcription_sessions", + body=maybe_transform( + { + "include": include, + "input_audio_format": input_audio_format, + "input_audio_noise_reduction": input_audio_noise_reduction, + "input_audio_transcription": input_audio_transcription, + "modalities": modalities, + "turn_detection": turn_detection, + }, + transcription_session_create_params.TranscriptionSessionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=TranscriptionSession, + ) + + +class AsyncTranscriptionSessions(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncTranscriptionSessionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncTranscriptionSessionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncTranscriptionSessionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncTranscriptionSessionsWithStreamingResponse(self) + + async def create( + self, + *, + include: List[str] | NotGiven = NOT_GIVEN, + input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction + | NotGiven = NOT_GIVEN, + input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN, + modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN, + turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TranscriptionSession: + """ + Create an ephemeral API token for use in client-side applications with the + Realtime API specifically for realtime transcriptions. Can be configured with + the same session parameters as the `transcription_session.update` client event. + + It responds with a session object, plus a `client_secret` key which contains a + usable ephemeral API token that can be used to authenticate browser clients for + the Realtime API. + + Args: + include: + The set of items to include in the transcription. Current available items are: + + - `item.input_audio_transcription.logprobs` + + input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + (mono), and little-endian byte order. + + input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn + off. Noise reduction filters audio added to the input audio buffer before it is + sent to VAD and the model. Filtering the audio can improve VAD and turn + detection accuracy (reducing false positives) and model performance by improving + perception of the input audio. + + input_audio_transcription: Configuration for input audio transcription. The client can optionally set the + language and prompt for transcription, these offer additional guidance to the + transcription service. + + modalities: The set of modalities the model can respond with. To disable audio, set this to + ["text"]. + + turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + set to `null` to turn off, in which case the client must manually trigger model + response. Server VAD means that the model will detect the start and end of + speech based on audio volume and respond at the end of user speech. Semantic VAD + is more advanced and uses a turn detection model (in conjuction with VAD) to + semantically estimate whether the user has finished speaking, then dynamically + sets a timeout based on this probability. For example, if user audio trails off + with "uhhm", the model will score a low probability of turn end and wait longer + for the user to continue speaking. This can be useful for more natural + conversations, but may have a higher latency. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + "/realtime/transcription_sessions", + body=await async_maybe_transform( + { + "include": include, + "input_audio_format": input_audio_format, + "input_audio_noise_reduction": input_audio_noise_reduction, + "input_audio_transcription": input_audio_transcription, + "modalities": modalities, + "turn_detection": turn_detection, + }, + transcription_session_create_params.TranscriptionSessionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=TranscriptionSession, + ) + + +class TranscriptionSessionsWithRawResponse: + def __init__(self, transcription_sessions: TranscriptionSessions) -> None: + self._transcription_sessions = transcription_sessions + + self.create = _legacy_response.to_raw_response_wrapper( + transcription_sessions.create, + ) + + +class AsyncTranscriptionSessionsWithRawResponse: + def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None: + self._transcription_sessions = transcription_sessions + + self.create = _legacy_response.async_to_raw_response_wrapper( + transcription_sessions.create, + ) + + +class TranscriptionSessionsWithStreamingResponse: + def __init__(self, transcription_sessions: TranscriptionSessions) -> None: + self._transcription_sessions = transcription_sessions + + self.create = to_streamed_response_wrapper( + transcription_sessions.create, + ) + + +class AsyncTranscriptionSessionsWithStreamingResponse: + def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None: + self._transcription_sessions = transcription_sessions + + self.create = async_to_streamed_response_wrapper( + transcription_sessions.create, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py new file mode 100644 index 00000000..a66e445b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py @@ -0,0 +1,47 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .runs import ( + Runs, + AsyncRuns, + RunsWithRawResponse, + AsyncRunsWithRawResponse, + RunsWithStreamingResponse, + AsyncRunsWithStreamingResponse, +) +from .threads import ( + Threads, + AsyncThreads, + ThreadsWithRawResponse, + AsyncThreadsWithRawResponse, + ThreadsWithStreamingResponse, + AsyncThreadsWithStreamingResponse, +) +from .messages import ( + Messages, + AsyncMessages, + MessagesWithRawResponse, + AsyncMessagesWithRawResponse, + MessagesWithStreamingResponse, + AsyncMessagesWithStreamingResponse, +) + +__all__ = [ + "Runs", + "AsyncRuns", + "RunsWithRawResponse", + "AsyncRunsWithRawResponse", + "RunsWithStreamingResponse", + "AsyncRunsWithStreamingResponse", + "Messages", + "AsyncMessages", + "MessagesWithRawResponse", + "AsyncMessagesWithRawResponse", + "MessagesWithStreamingResponse", + "AsyncMessagesWithStreamingResponse", + "Threads", + "AsyncThreads", + "ThreadsWithRawResponse", + "AsyncThreadsWithRawResponse", + "ThreadsWithStreamingResponse", + "AsyncThreadsWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py new file mode 100644 index 00000000..e3374aba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py @@ -0,0 +1,670 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Iterable, Optional +from typing_extensions import Literal + +import httpx + +from .... import _legacy_response +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import ( + maybe_transform, + async_maybe_transform, +) +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ....pagination import SyncCursorPage, AsyncCursorPage +from ...._base_client import ( + AsyncPaginator, + make_request_options, +) +from ....types.beta.threads import message_list_params, message_create_params, message_update_params +from ....types.beta.threads.message import Message +from ....types.shared_params.metadata import Metadata +from ....types.beta.threads.message_deleted import MessageDeleted +from ....types.beta.threads.message_content_part_param import MessageContentPartParam + +__all__ = ["Messages", "AsyncMessages"] + + +class Messages(SyncAPIResource): + @cached_property + def with_raw_response(self) -> MessagesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return MessagesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> MessagesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return MessagesWithStreamingResponse(self) + + def create( + self, + thread_id: str, + *, + content: Union[str, Iterable[MessageContentPartParam]], + role: Literal["user", "assistant"], + attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Message: + """ + Create a message. + + Args: + content: The text contents of the message. + + role: + The role of the entity that is creating the message. Allowed values include: + + - `user`: Indicates the message is sent by an actual user and should be used in + most cases to represent user-generated messages. + - `assistant`: Indicates the message is generated by the assistant. Use this + value to insert messages from the assistant into the conversation. + + attachments: A list of files attached to the message, and the tools they should be added to. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/threads/{thread_id}/messages", + body=maybe_transform( + { + "content": content, + "role": role, + "attachments": attachments, + "metadata": metadata, + }, + message_create_params.MessageCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Message, + ) + + def retrieve( + self, + message_id: str, + *, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Message: + """ + Retrieve a message. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not message_id: + raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get( + f"/threads/{thread_id}/messages/{message_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Message, + ) + + def update( + self, + message_id: str, + *, + thread_id: str, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Message: + """ + Modifies a message. + + Args: + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not message_id: + raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/threads/{thread_id}/messages/{message_id}", + body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Message, + ) + + def list( + self, + thread_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + run_id: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[Message]: + """ + Returns a list of messages for a given thread. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + run_id: Filter messages by the run ID that generated them. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/threads/{thread_id}/messages", + page=SyncCursorPage[Message], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + "run_id": run_id, + }, + message_list_params.MessageListParams, + ), + ), + model=Message, + ) + + def delete( + self, + message_id: str, + *, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> MessageDeleted: + """ + Deletes a message. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not message_id: + raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._delete( + f"/threads/{thread_id}/messages/{message_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=MessageDeleted, + ) + + +class AsyncMessages(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncMessagesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncMessagesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncMessagesWithStreamingResponse(self) + + async def create( + self, + thread_id: str, + *, + content: Union[str, Iterable[MessageContentPartParam]], + role: Literal["user", "assistant"], + attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Message: + """ + Create a message. + + Args: + content: The text contents of the message. + + role: + The role of the entity that is creating the message. Allowed values include: + + - `user`: Indicates the message is sent by an actual user and should be used in + most cases to represent user-generated messages. + - `assistant`: Indicates the message is generated by the assistant. Use this + value to insert messages from the assistant into the conversation. + + attachments: A list of files attached to the message, and the tools they should be added to. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/threads/{thread_id}/messages", + body=await async_maybe_transform( + { + "content": content, + "role": role, + "attachments": attachments, + "metadata": metadata, + }, + message_create_params.MessageCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Message, + ) + + async def retrieve( + self, + message_id: str, + *, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Message: + """ + Retrieve a message. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not message_id: + raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._get( + f"/threads/{thread_id}/messages/{message_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Message, + ) + + async def update( + self, + message_id: str, + *, + thread_id: str, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Message: + """ + Modifies a message. + + Args: + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not message_id: + raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/threads/{thread_id}/messages/{message_id}", + body=await async_maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Message, + ) + + def list( + self, + thread_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + run_id: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]: + """ + Returns a list of messages for a given thread. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + run_id: Filter messages by the run ID that generated them. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/threads/{thread_id}/messages", + page=AsyncCursorPage[Message], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + "run_id": run_id, + }, + message_list_params.MessageListParams, + ), + ), + model=Message, + ) + + async def delete( + self, + message_id: str, + *, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> MessageDeleted: + """ + Deletes a message. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not message_id: + raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._delete( + f"/threads/{thread_id}/messages/{message_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=MessageDeleted, + ) + + +class MessagesWithRawResponse: + def __init__(self, messages: Messages) -> None: + self._messages = messages + + self.create = _legacy_response.to_raw_response_wrapper( + messages.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + messages.retrieve, + ) + self.update = _legacy_response.to_raw_response_wrapper( + messages.update, + ) + self.list = _legacy_response.to_raw_response_wrapper( + messages.list, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + messages.delete, + ) + + +class AsyncMessagesWithRawResponse: + def __init__(self, messages: AsyncMessages) -> None: + self._messages = messages + + self.create = _legacy_response.async_to_raw_response_wrapper( + messages.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + messages.retrieve, + ) + self.update = _legacy_response.async_to_raw_response_wrapper( + messages.update, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + messages.list, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + messages.delete, + ) + + +class MessagesWithStreamingResponse: + def __init__(self, messages: Messages) -> None: + self._messages = messages + + self.create = to_streamed_response_wrapper( + messages.create, + ) + self.retrieve = to_streamed_response_wrapper( + messages.retrieve, + ) + self.update = to_streamed_response_wrapper( + messages.update, + ) + self.list = to_streamed_response_wrapper( + messages.list, + ) + self.delete = to_streamed_response_wrapper( + messages.delete, + ) + + +class AsyncMessagesWithStreamingResponse: + def __init__(self, messages: AsyncMessages) -> None: + self._messages = messages + + self.create = async_to_streamed_response_wrapper( + messages.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + messages.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + messages.update, + ) + self.list = async_to_streamed_response_wrapper( + messages.list, + ) + self.delete = async_to_streamed_response_wrapper( + messages.delete, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py new file mode 100644 index 00000000..50aa9fae --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .runs import ( + Runs, + AsyncRuns, + RunsWithRawResponse, + AsyncRunsWithRawResponse, + RunsWithStreamingResponse, + AsyncRunsWithStreamingResponse, +) +from .steps import ( + Steps, + AsyncSteps, + StepsWithRawResponse, + AsyncStepsWithRawResponse, + StepsWithStreamingResponse, + AsyncStepsWithStreamingResponse, +) + +__all__ = [ + "Steps", + "AsyncSteps", + "StepsWithRawResponse", + "AsyncStepsWithRawResponse", + "StepsWithStreamingResponse", + "AsyncStepsWithStreamingResponse", + "Runs", + "AsyncRuns", + "RunsWithRawResponse", + "AsyncRunsWithRawResponse", + "RunsWithStreamingResponse", + "AsyncRunsWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py new file mode 100644 index 00000000..acb1c9b2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py @@ -0,0 +1,2989 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import typing_extensions +from typing import List, Union, Iterable, Optional +from functools import partial +from typing_extensions import Literal, overload + +import httpx + +from ..... import _legacy_response +from .steps import ( + Steps, + AsyncSteps, + StepsWithRawResponse, + AsyncStepsWithRawResponse, + StepsWithStreamingResponse, + AsyncStepsWithStreamingResponse, +) +from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ....._utils import ( + is_given, + required_args, + maybe_transform, + async_maybe_transform, +) +from ....._compat import cached_property +from ....._resource import SyncAPIResource, AsyncAPIResource +from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ....._streaming import Stream, AsyncStream +from .....pagination import SyncCursorPage, AsyncCursorPage +from ....._base_client import AsyncPaginator, make_request_options +from .....lib.streaming import ( + AssistantEventHandler, + AssistantEventHandlerT, + AssistantStreamManager, + AsyncAssistantEventHandler, + AsyncAssistantEventHandlerT, + AsyncAssistantStreamManager, +) +from .....types.beta.threads import ( + run_list_params, + run_create_params, + run_update_params, + run_submit_tool_outputs_params, +) +from .....types.beta.threads.run import Run +from .....types.shared.chat_model import ChatModel +from .....types.shared_params.metadata import Metadata +from .....types.shared.reasoning_effort import ReasoningEffort +from .....types.beta.assistant_tool_param import AssistantToolParam +from .....types.beta.assistant_stream_event import AssistantStreamEvent +from .....types.beta.threads.runs.run_step_include import RunStepInclude +from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam +from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam + +__all__ = ["Runs", "AsyncRuns"] + + +class Runs(SyncAPIResource): + @cached_property + def steps(self) -> Steps: + return Steps(self._client) + + @cached_property + def with_raw_response(self) -> RunsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return RunsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> RunsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return RunsWithStreamingResponse(self) + + @overload + def create( + self, + thread_id: str, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Create a run. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + additional_instructions: Appends additional instructions at the end of the instructions for the run. This + is useful for modifying the behavior on a per-run basis without overriding other + instructions. + + additional_messages: Adds additional messages to the thread before creating the run. + + instructions: Overrides the + [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) + of the assistant. This is useful for modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + thread_id: str, + *, + assistant_id: str, + stream: Literal[True], + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[AssistantStreamEvent]: + """ + Create a run. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + additional_instructions: Appends additional instructions at the end of the instructions for the run. This + is useful for modifying the behavior on a per-run basis without overriding other + instructions. + + additional_messages: Adds additional messages to the thread before creating the run. + + instructions: Overrides the + [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) + of the assistant. This is useful for modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + thread_id: str, + *, + assistant_id: str, + stream: bool, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | Stream[AssistantStreamEvent]: + """ + Create a run. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + additional_instructions: Appends additional instructions at the end of the instructions for the run. This + is useful for modifying the behavior on a per-run basis without overriding other + instructions. + + additional_messages: Adds additional messages to the thread before creating the run. + + instructions: Overrides the + [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) + of the assistant. This is useful for modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["assistant_id"], ["assistant_id", "stream"]) + def create( + self, + thread_id: str, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | Stream[AssistantStreamEvent]: + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/threads/{thread_id}/runs", + body=maybe_transform( + { + "assistant_id": assistant_id, + "additional_instructions": additional_instructions, + "additional_messages": additional_messages, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "parallel_tool_calls": parallel_tool_calls, + "reasoning_effort": reasoning_effort, + "response_format": response_format, + "stream": stream, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation_strategy": truncation_strategy, + }, + run_create_params.RunCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"include": include}, run_create_params.RunCreateParams), + ), + cast_to=Run, + stream=stream or False, + stream_cls=Stream[AssistantStreamEvent], + ) + + def retrieve( + self, + run_id: str, + *, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Retrieves a run. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get( + f"/threads/{thread_id}/runs/{run_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + ) + + def update( + self, + run_id: str, + *, + thread_id: str, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Modifies a run. + + Args: + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/threads/{thread_id}/runs/{run_id}", + body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + ) + + def list( + self, + thread_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[Run]: + """ + Returns a list of runs belonging to a thread. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/threads/{thread_id}/runs", + page=SyncCursorPage[Run], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + }, + run_list_params.RunListParams, + ), + ), + model=Run, + ) + + def cancel( + self, + run_id: str, + *, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Cancels a run that is `in_progress`. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/threads/{thread_id}/runs/{run_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + ) + + def create_and_poll( + self, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + A helper to create a run an poll for a terminal state. More information on Run + lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + run = self.create( + thread_id=thread_id, + assistant_id=assistant_id, + include=include, + additional_instructions=additional_instructions, + additional_messages=additional_messages, + instructions=instructions, + max_completion_tokens=max_completion_tokens, + max_prompt_tokens=max_prompt_tokens, + metadata=metadata, + model=model, + response_format=response_format, + temperature=temperature, + tool_choice=tool_choice, + parallel_tool_calls=parallel_tool_calls, + reasoning_effort=reasoning_effort, + # We assume we are not streaming when polling + stream=False, + tools=tools, + truncation_strategy=truncation_strategy, + top_p=top_p, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return self.poll( + run.id, + thread_id=thread_id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + poll_interval_ms=poll_interval_ms, + timeout=timeout, + ) + + @overload + @typing_extensions.deprecated("use `stream` instead") + def create_and_stream( + self, + *, + assistant_id: str, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandler]: + """Create a Run stream""" + ... + + @overload + @typing_extensions.deprecated("use `stream` instead") + def create_and_stream( + self, + *, + assistant_id: str, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + event_handler: AssistantEventHandlerT, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandlerT]: + """Create a Run stream""" + ... + + @typing_extensions.deprecated("use `stream` instead") + def create_and_stream( + self, + *, + assistant_id: str, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + event_handler: AssistantEventHandlerT | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]: + """Create a Run stream""" + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + + extra_headers = { + "OpenAI-Beta": "assistants=v2", + "X-Stainless-Stream-Helper": "threads.runs.create_and_stream", + "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false", + **(extra_headers or {}), + } + make_request = partial( + self._post, + f"/threads/{thread_id}/runs", + body=maybe_transform( + { + "assistant_id": assistant_id, + "additional_instructions": additional_instructions, + "additional_messages": additional_messages, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "response_format": response_format, + "temperature": temperature, + "tool_choice": tool_choice, + "stream": True, + "tools": tools, + "truncation_strategy": truncation_strategy, + "parallel_tool_calls": parallel_tool_calls, + "reasoning_effort": reasoning_effort, + "top_p": top_p, + }, + run_create_params.RunCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=True, + stream_cls=Stream[AssistantStreamEvent], + ) + return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler()) + + def poll( + self, + run_id: str, + thread_id: str, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + ) -> Run: + """ + A helper to poll a run status until it reaches a terminal state. More + information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})} + + if is_given(poll_interval_ms): + extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms) + + terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"} + while True: + response = self.with_raw_response.retrieve( + thread_id=thread_id, + run_id=run_id, + extra_headers=extra_headers, + extra_body=extra_body, + extra_query=extra_query, + timeout=timeout, + ) + + run = response.parse() + # Return if we reached a terminal state + if run.status in terminal_states: + return run + + if not is_given(poll_interval_ms): + from_header = response.headers.get("openai-poll-after-ms") + if from_header is not None: + poll_interval_ms = int(from_header) + else: + poll_interval_ms = 1000 + + self._sleep(poll_interval_ms / 1000) + + @overload + def stream( + self, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandler]: + """Create a Run stream""" + ... + + @overload + def stream( + self, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + event_handler: AssistantEventHandlerT, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandlerT]: + """Create a Run stream""" + ... + + def stream( + self, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + event_handler: AssistantEventHandlerT | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]: + """Create a Run stream""" + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + + extra_headers = { + "OpenAI-Beta": "assistants=v2", + "X-Stainless-Stream-Helper": "threads.runs.create_and_stream", + "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false", + **(extra_headers or {}), + } + make_request = partial( + self._post, + f"/threads/{thread_id}/runs", + body=maybe_transform( + { + "assistant_id": assistant_id, + "additional_instructions": additional_instructions, + "additional_messages": additional_messages, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "response_format": response_format, + "temperature": temperature, + "tool_choice": tool_choice, + "stream": True, + "tools": tools, + "parallel_tool_calls": parallel_tool_calls, + "reasoning_effort": reasoning_effort, + "truncation_strategy": truncation_strategy, + "top_p": top_p, + }, + run_create_params.RunCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"include": include}, run_create_params.RunCreateParams), + ), + cast_to=Run, + stream=True, + stream_cls=Stream[AssistantStreamEvent], + ) + return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler()) + + @overload + def submit_tool_outputs( + self, + run_id: str, + *, + thread_id: str, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + When a run has the `status: "requires_action"` and `required_action.type` is + `submit_tool_outputs`, this endpoint can be used to submit the outputs from the + tool calls once they're all completed. All outputs must be submitted in a single + request. + + Args: + tool_outputs: A list of tools for which the outputs are being submitted. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def submit_tool_outputs( + self, + run_id: str, + *, + thread_id: str, + stream: Literal[True], + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[AssistantStreamEvent]: + """ + When a run has the `status: "requires_action"` and `required_action.type` is + `submit_tool_outputs`, this endpoint can be used to submit the outputs from the + tool calls once they're all completed. All outputs must be submitted in a single + request. + + Args: + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + tool_outputs: A list of tools for which the outputs are being submitted. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def submit_tool_outputs( + self, + run_id: str, + *, + thread_id: str, + stream: bool, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | Stream[AssistantStreamEvent]: + """ + When a run has the `status: "requires_action"` and `required_action.type` is + `submit_tool_outputs`, this endpoint can be used to submit the outputs from the + tool calls once they're all completed. All outputs must be submitted in a single + request. + + Args: + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + tool_outputs: A list of tools for which the outputs are being submitted. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"]) + def submit_tool_outputs( + self, + run_id: str, + *, + thread_id: str, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | Stream[AssistantStreamEvent]: + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs", + body=maybe_transform( + { + "tool_outputs": tool_outputs, + "stream": stream, + }, + run_submit_tool_outputs_params.RunSubmitToolOutputsParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=stream or False, + stream_cls=Stream[AssistantStreamEvent], + ) + + def submit_tool_outputs_and_poll( + self, + *, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + run_id: str, + thread_id: str, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + A helper to submit a tool output to a run and poll for a terminal run state. + More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + run = self.submit_tool_outputs( + run_id=run_id, + thread_id=thread_id, + tool_outputs=tool_outputs, + stream=False, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return self.poll( + run_id=run.id, + thread_id=thread_id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + poll_interval_ms=poll_interval_ms, + ) + + @overload + def submit_tool_outputs_stream( + self, + *, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + run_id: str, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandler]: + """ + Submit the tool outputs from a previous run and stream the run to a terminal + state. More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + ... + + @overload + def submit_tool_outputs_stream( + self, + *, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + run_id: str, + thread_id: str, + event_handler: AssistantEventHandlerT, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandlerT]: + """ + Submit the tool outputs from a previous run and stream the run to a terminal + state. More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + ... + + def submit_tool_outputs_stream( + self, + *, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + run_id: str, + thread_id: str, + event_handler: AssistantEventHandlerT | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]: + """ + Submit the tool outputs from a previous run and stream the run to a terminal + state. More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + + extra_headers = { + "OpenAI-Beta": "assistants=v2", + "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream", + "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false", + **(extra_headers or {}), + } + request = partial( + self._post, + f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs", + body=maybe_transform( + { + "tool_outputs": tool_outputs, + "stream": True, + }, + run_submit_tool_outputs_params.RunSubmitToolOutputsParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=True, + stream_cls=Stream[AssistantStreamEvent], + ) + return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler()) + + +class AsyncRuns(AsyncAPIResource): + @cached_property + def steps(self) -> AsyncSteps: + return AsyncSteps(self._client) + + @cached_property + def with_raw_response(self) -> AsyncRunsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncRunsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncRunsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncRunsWithStreamingResponse(self) + + @overload + async def create( + self, + thread_id: str, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Create a run. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + additional_instructions: Appends additional instructions at the end of the instructions for the run. This + is useful for modifying the behavior on a per-run basis without overriding other + instructions. + + additional_messages: Adds additional messages to the thread before creating the run. + + instructions: Overrides the + [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) + of the assistant. This is useful for modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + thread_id: str, + *, + assistant_id: str, + stream: Literal[True], + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[AssistantStreamEvent]: + """ + Create a run. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + additional_instructions: Appends additional instructions at the end of the instructions for the run. This + is useful for modifying the behavior on a per-run basis without overriding other + instructions. + + additional_messages: Adds additional messages to the thread before creating the run. + + instructions: Overrides the + [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) + of the assistant. This is useful for modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + thread_id: str, + *, + assistant_id: str, + stream: bool, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | AsyncStream[AssistantStreamEvent]: + """ + Create a run. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + additional_instructions: Appends additional instructions at the end of the instructions for the run. This + is useful for modifying the behavior on a per-run basis without overriding other + instructions. + + additional_messages: Adds additional messages to the thread before creating the run. + + instructions: Overrides the + [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) + of the assistant. This is useful for modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["assistant_id"], ["assistant_id", "stream"]) + async def create( + self, + thread_id: str, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | AsyncStream[AssistantStreamEvent]: + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/threads/{thread_id}/runs", + body=await async_maybe_transform( + { + "assistant_id": assistant_id, + "additional_instructions": additional_instructions, + "additional_messages": additional_messages, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "parallel_tool_calls": parallel_tool_calls, + "reasoning_effort": reasoning_effort, + "response_format": response_format, + "stream": stream, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation_strategy": truncation_strategy, + }, + run_create_params.RunCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams), + ), + cast_to=Run, + stream=stream or False, + stream_cls=AsyncStream[AssistantStreamEvent], + ) + + async def retrieve( + self, + run_id: str, + *, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Retrieves a run. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._get( + f"/threads/{thread_id}/runs/{run_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + ) + + async def update( + self, + run_id: str, + *, + thread_id: str, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Modifies a run. + + Args: + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/threads/{thread_id}/runs/{run_id}", + body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + ) + + def list( + self, + thread_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]: + """ + Returns a list of runs belonging to a thread. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/threads/{thread_id}/runs", + page=AsyncCursorPage[Run], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + }, + run_list_params.RunListParams, + ), + ), + model=Run, + ) + + async def cancel( + self, + run_id: str, + *, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Cancels a run that is `in_progress`. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/threads/{thread_id}/runs/{run_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + ) + + async def create_and_poll( + self, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + A helper to create a run an poll for a terminal state. More information on Run + lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + run = await self.create( + thread_id=thread_id, + assistant_id=assistant_id, + include=include, + additional_instructions=additional_instructions, + additional_messages=additional_messages, + instructions=instructions, + max_completion_tokens=max_completion_tokens, + max_prompt_tokens=max_prompt_tokens, + metadata=metadata, + model=model, + response_format=response_format, + temperature=temperature, + tool_choice=tool_choice, + parallel_tool_calls=parallel_tool_calls, + reasoning_effort=reasoning_effort, + # We assume we are not streaming when polling + stream=False, + tools=tools, + truncation_strategy=truncation_strategy, + top_p=top_p, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return await self.poll( + run.id, + thread_id=thread_id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + poll_interval_ms=poll_interval_ms, + timeout=timeout, + ) + + @overload + @typing_extensions.deprecated("use `stream` instead") + def create_and_stream( + self, + *, + assistant_id: str, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]: + """Create a Run stream""" + ... + + @overload + @typing_extensions.deprecated("use `stream` instead") + def create_and_stream( + self, + *, + assistant_id: str, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + event_handler: AsyncAssistantEventHandlerT, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]: + """Create a Run stream""" + ... + + @typing_extensions.deprecated("use `stream` instead") + def create_and_stream( + self, + *, + assistant_id: str, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + event_handler: AsyncAssistantEventHandlerT | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ( + AsyncAssistantStreamManager[AsyncAssistantEventHandler] + | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT] + ): + """Create a Run stream""" + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + + extra_headers = { + "OpenAI-Beta": "assistants=v2", + "X-Stainless-Stream-Helper": "threads.runs.create_and_stream", + "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false", + **(extra_headers or {}), + } + request = self._post( + f"/threads/{thread_id}/runs", + body=maybe_transform( + { + "assistant_id": assistant_id, + "additional_instructions": additional_instructions, + "additional_messages": additional_messages, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "response_format": response_format, + "temperature": temperature, + "tool_choice": tool_choice, + "stream": True, + "tools": tools, + "truncation_strategy": truncation_strategy, + "top_p": top_p, + "parallel_tool_calls": parallel_tool_calls, + }, + run_create_params.RunCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=True, + stream_cls=AsyncStream[AssistantStreamEvent], + ) + return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler()) + + async def poll( + self, + run_id: str, + thread_id: str, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + ) -> Run: + """ + A helper to poll a run status until it reaches a terminal state. More + information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})} + + if is_given(poll_interval_ms): + extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms) + + terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"} + while True: + response = await self.with_raw_response.retrieve( + thread_id=thread_id, + run_id=run_id, + extra_headers=extra_headers, + extra_body=extra_body, + extra_query=extra_query, + timeout=timeout, + ) + + run = response.parse() + # Return if we reached a terminal state + if run.status in terminal_states: + return run + + if not is_given(poll_interval_ms): + from_header = response.headers.get("openai-poll-after-ms") + if from_header is not None: + poll_interval_ms = int(from_header) + else: + poll_interval_ms = 1000 + + await self._sleep(poll_interval_ms / 1000) + + @overload + def stream( + self, + *, + assistant_id: str, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]: + """Create a Run stream""" + ... + + @overload + def stream( + self, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + event_handler: AsyncAssistantEventHandlerT, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]: + """Create a Run stream""" + ... + + def stream( + self, + *, + assistant_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + additional_instructions: Optional[str] | NotGiven = NOT_GIVEN, + additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + thread_id: str, + event_handler: AsyncAssistantEventHandlerT | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ( + AsyncAssistantStreamManager[AsyncAssistantEventHandler] + | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT] + ): + """Create a Run stream""" + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + + extra_headers = { + "OpenAI-Beta": "assistants=v2", + "X-Stainless-Stream-Helper": "threads.runs.create_and_stream", + "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false", + **(extra_headers or {}), + } + request = self._post( + f"/threads/{thread_id}/runs", + body=maybe_transform( + { + "assistant_id": assistant_id, + "additional_instructions": additional_instructions, + "additional_messages": additional_messages, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "response_format": response_format, + "temperature": temperature, + "tool_choice": tool_choice, + "stream": True, + "tools": tools, + "parallel_tool_calls": parallel_tool_calls, + "reasoning_effort": reasoning_effort, + "truncation_strategy": truncation_strategy, + "top_p": top_p, + }, + run_create_params.RunCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"include": include}, run_create_params.RunCreateParams), + ), + cast_to=Run, + stream=True, + stream_cls=AsyncStream[AssistantStreamEvent], + ) + return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler()) + + @overload + async def submit_tool_outputs( + self, + run_id: str, + *, + thread_id: str, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + When a run has the `status: "requires_action"` and `required_action.type` is + `submit_tool_outputs`, this endpoint can be used to submit the outputs from the + tool calls once they're all completed. All outputs must be submitted in a single + request. + + Args: + tool_outputs: A list of tools for which the outputs are being submitted. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def submit_tool_outputs( + self, + run_id: str, + *, + thread_id: str, + stream: Literal[True], + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[AssistantStreamEvent]: + """ + When a run has the `status: "requires_action"` and `required_action.type` is + `submit_tool_outputs`, this endpoint can be used to submit the outputs from the + tool calls once they're all completed. All outputs must be submitted in a single + request. + + Args: + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + tool_outputs: A list of tools for which the outputs are being submitted. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def submit_tool_outputs( + self, + run_id: str, + *, + thread_id: str, + stream: bool, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | AsyncStream[AssistantStreamEvent]: + """ + When a run has the `status: "requires_action"` and `required_action.type` is + `submit_tool_outputs`, this endpoint can be used to submit the outputs from the + tool calls once they're all completed. All outputs must be submitted in a single + request. + + Args: + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + tool_outputs: A list of tools for which the outputs are being submitted. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"]) + async def submit_tool_outputs( + self, + run_id: str, + *, + thread_id: str, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | AsyncStream[AssistantStreamEvent]: + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs", + body=await async_maybe_transform( + { + "tool_outputs": tool_outputs, + "stream": stream, + }, + run_submit_tool_outputs_params.RunSubmitToolOutputsParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=stream or False, + stream_cls=AsyncStream[AssistantStreamEvent], + ) + + async def submit_tool_outputs_and_poll( + self, + *, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + run_id: str, + thread_id: str, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + A helper to submit a tool output to a run and poll for a terminal run state. + More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + run = await self.submit_tool_outputs( + run_id=run_id, + thread_id=thread_id, + tool_outputs=tool_outputs, + stream=False, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return await self.poll( + run_id=run.id, + thread_id=thread_id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + poll_interval_ms=poll_interval_ms, + ) + + @overload + def submit_tool_outputs_stream( + self, + *, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + run_id: str, + thread_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]: + """ + Submit the tool outputs from a previous run and stream the run to a terminal + state. More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + ... + + @overload + def submit_tool_outputs_stream( + self, + *, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + run_id: str, + thread_id: str, + event_handler: AsyncAssistantEventHandlerT, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]: + """ + Submit the tool outputs from a previous run and stream the run to a terminal + state. More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + ... + + def submit_tool_outputs_stream( + self, + *, + tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput], + run_id: str, + thread_id: str, + event_handler: AsyncAssistantEventHandlerT | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ( + AsyncAssistantStreamManager[AsyncAssistantEventHandler] + | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT] + ): + """ + Submit the tool outputs from a previous run and stream the run to a terminal + state. More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + + extra_headers = { + "OpenAI-Beta": "assistants=v2", + "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream", + "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false", + **(extra_headers or {}), + } + request = self._post( + f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs", + body=maybe_transform( + { + "tool_outputs": tool_outputs, + "stream": True, + }, + run_submit_tool_outputs_params.RunSubmitToolOutputsParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=True, + stream_cls=AsyncStream[AssistantStreamEvent], + ) + return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler()) + + +class RunsWithRawResponse: + def __init__(self, runs: Runs) -> None: + self._runs = runs + + self.create = _legacy_response.to_raw_response_wrapper( + runs.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + runs.retrieve, + ) + self.update = _legacy_response.to_raw_response_wrapper( + runs.update, + ) + self.list = _legacy_response.to_raw_response_wrapper( + runs.list, + ) + self.cancel = _legacy_response.to_raw_response_wrapper( + runs.cancel, + ) + self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper( + runs.submit_tool_outputs, + ) + + @cached_property + def steps(self) -> StepsWithRawResponse: + return StepsWithRawResponse(self._runs.steps) + + +class AsyncRunsWithRawResponse: + def __init__(self, runs: AsyncRuns) -> None: + self._runs = runs + + self.create = _legacy_response.async_to_raw_response_wrapper( + runs.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + runs.retrieve, + ) + self.update = _legacy_response.async_to_raw_response_wrapper( + runs.update, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + runs.list, + ) + self.cancel = _legacy_response.async_to_raw_response_wrapper( + runs.cancel, + ) + self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper( + runs.submit_tool_outputs, + ) + + @cached_property + def steps(self) -> AsyncStepsWithRawResponse: + return AsyncStepsWithRawResponse(self._runs.steps) + + +class RunsWithStreamingResponse: + def __init__(self, runs: Runs) -> None: + self._runs = runs + + self.create = to_streamed_response_wrapper( + runs.create, + ) + self.retrieve = to_streamed_response_wrapper( + runs.retrieve, + ) + self.update = to_streamed_response_wrapper( + runs.update, + ) + self.list = to_streamed_response_wrapper( + runs.list, + ) + self.cancel = to_streamed_response_wrapper( + runs.cancel, + ) + self.submit_tool_outputs = to_streamed_response_wrapper( + runs.submit_tool_outputs, + ) + + @cached_property + def steps(self) -> StepsWithStreamingResponse: + return StepsWithStreamingResponse(self._runs.steps) + + +class AsyncRunsWithStreamingResponse: + def __init__(self, runs: AsyncRuns) -> None: + self._runs = runs + + self.create = async_to_streamed_response_wrapper( + runs.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + runs.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + runs.update, + ) + self.list = async_to_streamed_response_wrapper( + runs.list, + ) + self.cancel = async_to_streamed_response_wrapper( + runs.cancel, + ) + self.submit_tool_outputs = async_to_streamed_response_wrapper( + runs.submit_tool_outputs, + ) + + @cached_property + def steps(self) -> AsyncStepsWithStreamingResponse: + return AsyncStepsWithStreamingResponse(self._runs.steps) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py new file mode 100644 index 00000000..709c729d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py @@ -0,0 +1,381 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List +from typing_extensions import Literal + +import httpx + +from ..... import _legacy_response +from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ....._utils import ( + maybe_transform, + async_maybe_transform, +) +from ....._compat import cached_property +from ....._resource import SyncAPIResource, AsyncAPIResource +from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from .....pagination import SyncCursorPage, AsyncCursorPage +from ....._base_client import AsyncPaginator, make_request_options +from .....types.beta.threads.runs import step_list_params, step_retrieve_params +from .....types.beta.threads.runs.run_step import RunStep +from .....types.beta.threads.runs.run_step_include import RunStepInclude + +__all__ = ["Steps", "AsyncSteps"] + + +class Steps(SyncAPIResource): + @cached_property + def with_raw_response(self) -> StepsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return StepsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> StepsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return StepsWithStreamingResponse(self) + + def retrieve( + self, + step_id: str, + *, + thread_id: str, + run_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> RunStep: + """ + Retrieves a run step. + + Args: + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + if not step_id: + raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get( + f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams), + ), + cast_to=RunStep, + ) + + def list( + self, + run_id: str, + *, + thread_id: str, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[RunStep]: + """ + Returns a list of run steps belonging to a run. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/threads/{thread_id}/runs/{run_id}/steps", + page=SyncCursorPage[RunStep], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "include": include, + "limit": limit, + "order": order, + }, + step_list_params.StepListParams, + ), + ), + model=RunStep, + ) + + +class AsyncSteps(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncStepsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncStepsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncStepsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncStepsWithStreamingResponse(self) + + async def retrieve( + self, + step_id: str, + *, + thread_id: str, + run_id: str, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> RunStep: + """ + Retrieves a run step. + + Args: + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + if not step_id: + raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._get( + f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams), + ), + cast_to=RunStep, + ) + + def list( + self, + run_id: str, + *, + thread_id: str, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + include: List[RunStepInclude] | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[RunStep, AsyncCursorPage[RunStep]]: + """ + Returns a list of run steps belonging to a run. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + include: A list of additional fields to include in the response. Currently the only + supported value is `step_details.tool_calls[*].file_search.results[*].content` + to fetch the file search result content. + + See the + [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + if not run_id: + raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/threads/{thread_id}/runs/{run_id}/steps", + page=AsyncCursorPage[RunStep], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "include": include, + "limit": limit, + "order": order, + }, + step_list_params.StepListParams, + ), + ), + model=RunStep, + ) + + +class StepsWithRawResponse: + def __init__(self, steps: Steps) -> None: + self._steps = steps + + self.retrieve = _legacy_response.to_raw_response_wrapper( + steps.retrieve, + ) + self.list = _legacy_response.to_raw_response_wrapper( + steps.list, + ) + + +class AsyncStepsWithRawResponse: + def __init__(self, steps: AsyncSteps) -> None: + self._steps = steps + + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + steps.retrieve, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + steps.list, + ) + + +class StepsWithStreamingResponse: + def __init__(self, steps: Steps) -> None: + self._steps = steps + + self.retrieve = to_streamed_response_wrapper( + steps.retrieve, + ) + self.list = to_streamed_response_wrapper( + steps.list, + ) + + +class AsyncStepsWithStreamingResponse: + def __init__(self, steps: AsyncSteps) -> None: + self._steps = steps + + self.retrieve = async_to_streamed_response_wrapper( + steps.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + steps.list, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py new file mode 100644 index 00000000..d88559bd --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py @@ -0,0 +1,1875 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Iterable, Optional +from functools import partial +from typing_extensions import Literal, overload + +import httpx + +from .... import _legacy_response +from .messages import ( + Messages, + AsyncMessages, + MessagesWithRawResponse, + AsyncMessagesWithRawResponse, + MessagesWithStreamingResponse, + AsyncMessagesWithStreamingResponse, +) +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import ( + required_args, + maybe_transform, + async_maybe_transform, +) +from .runs.runs import ( + Runs, + AsyncRuns, + RunsWithRawResponse, + AsyncRunsWithRawResponse, + RunsWithStreamingResponse, + AsyncRunsWithStreamingResponse, +) +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...._streaming import Stream, AsyncStream +from ....types.beta import ( + thread_create_params, + thread_update_params, + thread_create_and_run_params, +) +from ...._base_client import make_request_options +from ....lib.streaming import ( + AssistantEventHandler, + AssistantEventHandlerT, + AssistantStreamManager, + AsyncAssistantEventHandler, + AsyncAssistantEventHandlerT, + AsyncAssistantStreamManager, +) +from ....types.beta.thread import Thread +from ....types.beta.threads.run import Run +from ....types.shared.chat_model import ChatModel +from ....types.beta.thread_deleted import ThreadDeleted +from ....types.shared_params.metadata import Metadata +from ....types.beta.assistant_stream_event import AssistantStreamEvent +from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam +from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam + +__all__ = ["Threads", "AsyncThreads"] + + +class Threads(SyncAPIResource): + @cached_property + def runs(self) -> Runs: + return Runs(self._client) + + @cached_property + def messages(self) -> Messages: + return Messages(self._client) + + @cached_property + def with_raw_response(self) -> ThreadsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return ThreadsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ThreadsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return ThreadsWithStreamingResponse(self) + + def create( + self, + *, + messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Thread: + """ + Create a thread. + + Args: + messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to + start the thread with. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + tool_resources: A set of resources that are made available to the assistant's tools in this + thread. The resources are specific to the type of tool. For example, the + `code_interpreter` tool requires a list of file IDs, while the `file_search` + tool requires a list of vector store IDs. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + "/threads", + body=maybe_transform( + { + "messages": messages, + "metadata": metadata, + "tool_resources": tool_resources, + }, + thread_create_params.ThreadCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Thread, + ) + + def retrieve( + self, + thread_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Thread: + """ + Retrieves a thread. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get( + f"/threads/{thread_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Thread, + ) + + def update( + self, + thread_id: str, + *, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Thread: + """ + Modifies a thread. + + Args: + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + tool_resources: A set of resources that are made available to the assistant's tools in this + thread. The resources are specific to the type of tool. For example, the + `code_interpreter` tool requires a list of file IDs, while the `file_search` + tool requires a list of vector store IDs. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/threads/{thread_id}", + body=maybe_transform( + { + "metadata": metadata, + "tool_resources": tool_resources, + }, + thread_update_params.ThreadUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Thread, + ) + + def delete( + self, + thread_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ThreadDeleted: + """ + Delete a thread. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._delete( + f"/threads/{thread_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ThreadDeleted, + ) + + @overload + def create_and_run( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Create a thread and run it in one request. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + instructions: Override the default system message of the assistant. This is useful for + modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + thread: Options to create a new thread. If no thread is provided when running a request, + an empty thread will be created. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create_and_run( + self, + *, + assistant_id: str, + stream: Literal[True], + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[AssistantStreamEvent]: + """ + Create a thread and run it in one request. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + instructions: Override the default system message of the assistant. This is useful for + modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + thread: Options to create a new thread. If no thread is provided when running a request, + an empty thread will be created. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create_and_run( + self, + *, + assistant_id: str, + stream: bool, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | Stream[AssistantStreamEvent]: + """ + Create a thread and run it in one request. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + instructions: Override the default system message of the assistant. This is useful for + modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + thread: Options to create a new thread. If no thread is provided when running a request, + an empty thread will be created. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["assistant_id"], ["assistant_id", "stream"]) + def create_and_run( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | Stream[AssistantStreamEvent]: + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + "/threads/runs", + body=maybe_transform( + { + "assistant_id": assistant_id, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "parallel_tool_calls": parallel_tool_calls, + "response_format": response_format, + "stream": stream, + "temperature": temperature, + "thread": thread, + "tool_choice": tool_choice, + "tool_resources": tool_resources, + "tools": tools, + "top_p": top_p, + "truncation_strategy": truncation_strategy, + }, + thread_create_and_run_params.ThreadCreateAndRunParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=stream or False, + stream_cls=Stream[AssistantStreamEvent], + ) + + def create_and_run_poll( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + A helper to create a thread, start a run and then poll for a terminal state. + More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + run = self.create_and_run( + assistant_id=assistant_id, + instructions=instructions, + max_completion_tokens=max_completion_tokens, + max_prompt_tokens=max_prompt_tokens, + metadata=metadata, + model=model, + parallel_tool_calls=parallel_tool_calls, + response_format=response_format, + temperature=temperature, + stream=False, + thread=thread, + tool_resources=tool_resources, + tool_choice=tool_choice, + truncation_strategy=truncation_strategy, + top_p=top_p, + tools=tools, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms) + + @overload + def create_and_run_stream( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandler]: + """Create a thread and stream the run back""" + ... + + @overload + def create_and_run_stream( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + event_handler: AssistantEventHandlerT, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandlerT]: + """Create a thread and stream the run back""" + ... + + def create_and_run_stream( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + event_handler: AssistantEventHandlerT | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]: + """Create a thread and stream the run back""" + extra_headers = { + "OpenAI-Beta": "assistants=v2", + "X-Stainless-Stream-Helper": "threads.create_and_run_stream", + "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false", + **(extra_headers or {}), + } + make_request = partial( + self._post, + "/threads/runs", + body=maybe_transform( + { + "assistant_id": assistant_id, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "parallel_tool_calls": parallel_tool_calls, + "response_format": response_format, + "temperature": temperature, + "tool_choice": tool_choice, + "stream": True, + "thread": thread, + "tools": tools, + "tool_resources": tool_resources, + "truncation_strategy": truncation_strategy, + "top_p": top_p, + }, + thread_create_and_run_params.ThreadCreateAndRunParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=True, + stream_cls=Stream[AssistantStreamEvent], + ) + return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler()) + + +class AsyncThreads(AsyncAPIResource): + @cached_property + def runs(self) -> AsyncRuns: + return AsyncRuns(self._client) + + @cached_property + def messages(self) -> AsyncMessages: + return AsyncMessages(self._client) + + @cached_property + def with_raw_response(self) -> AsyncThreadsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncThreadsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncThreadsWithStreamingResponse(self) + + async def create( + self, + *, + messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Thread: + """ + Create a thread. + + Args: + messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to + start the thread with. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + tool_resources: A set of resources that are made available to the assistant's tools in this + thread. The resources are specific to the type of tool. For example, the + `code_interpreter` tool requires a list of file IDs, while the `file_search` + tool requires a list of vector store IDs. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + "/threads", + body=await async_maybe_transform( + { + "messages": messages, + "metadata": metadata, + "tool_resources": tool_resources, + }, + thread_create_params.ThreadCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Thread, + ) + + async def retrieve( + self, + thread_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Thread: + """ + Retrieves a thread. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._get( + f"/threads/{thread_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Thread, + ) + + async def update( + self, + thread_id: str, + *, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Thread: + """ + Modifies a thread. + + Args: + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + tool_resources: A set of resources that are made available to the assistant's tools in this + thread. The resources are specific to the type of tool. For example, the + `code_interpreter` tool requires a list of file IDs, while the `file_search` + tool requires a list of vector store IDs. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/threads/{thread_id}", + body=await async_maybe_transform( + { + "metadata": metadata, + "tool_resources": tool_resources, + }, + thread_update_params.ThreadUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Thread, + ) + + async def delete( + self, + thread_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ThreadDeleted: + """ + Delete a thread. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not thread_id: + raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._delete( + f"/threads/{thread_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ThreadDeleted, + ) + + @overload + async def create_and_run( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + Create a thread and run it in one request. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + instructions: Override the default system message of the assistant. This is useful for + modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + thread: Options to create a new thread. If no thread is provided when running a request, + an empty thread will be created. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create_and_run( + self, + *, + assistant_id: str, + stream: Literal[True], + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[AssistantStreamEvent]: + """ + Create a thread and run it in one request. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + instructions: Override the default system message of the assistant. This is useful for + modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + thread: Options to create a new thread. If no thread is provided when running a request, + an empty thread will be created. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create_and_run( + self, + *, + assistant_id: str, + stream: bool, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | AsyncStream[AssistantStreamEvent]: + """ + Create a thread and run it in one request. + + Args: + assistant_id: The ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + + stream: If `true`, returns a stream of events that happen during the Run as server-sent + events, terminating when the Run enters a terminal state with a `data: [DONE]` + message. + + instructions: Override the default system message of the assistant. This is useful for + modifying the behavior on a per-run basis. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. If the run exceeds the number of + completion tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run. + The run will make a best effort to use only the number of prompt tokens + specified, across multiple turns of the run. If the run exceeds the number of + prompt tokens specified, the run will end with status `incomplete`. See + `incomplete_details` for more info. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to + be used to execute this run. If a value is provided here, it will override the + model associated with the assistant. If not, the model associated with the + assistant will be used. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + response_format: Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), + [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), + and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the + message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to + produce JSON yourself via a system or user message. Without this, the model may + generate an unending stream of whitespace until the generation reaches the token + limit, resulting in a long-running and seemingly "stuck" request. Also note that + the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the + max context length. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + thread: Options to create a new thread. If no thread is provided when running a request, + an empty thread will be created. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tools and instead generates a message. `auto` is the default value + and means the model can pick between generating a message or calling one or more + tools. `required` means the model must call one or more tools before responding + to the user. Specifying a particular tool like `{"type": "file_search"}` or + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + tool_resources: A set of resources that are used by the assistant's tools. The resources are + specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store + IDs. + + tools: Override the tools the assistant can use for this run. This is useful for + modifying the behavior on a per-run basis. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or temperature but not both. + + truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to + control the intial context window of the run. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["assistant_id"], ["assistant_id", "stream"]) + async def create_and_run( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run | AsyncStream[AssistantStreamEvent]: + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + "/threads/runs", + body=await async_maybe_transform( + { + "assistant_id": assistant_id, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "parallel_tool_calls": parallel_tool_calls, + "response_format": response_format, + "stream": stream, + "temperature": temperature, + "thread": thread, + "tool_choice": tool_choice, + "tool_resources": tool_resources, + "tools": tools, + "top_p": top_p, + "truncation_strategy": truncation_strategy, + }, + thread_create_and_run_params.ThreadCreateAndRunParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=stream or False, + stream_cls=AsyncStream[AssistantStreamEvent], + ) + + async def create_and_run_poll( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Run: + """ + A helper to create a thread, start a run and then poll for a terminal state. + More information on Run lifecycles can be found here: + https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps + """ + run = await self.create_and_run( + assistant_id=assistant_id, + instructions=instructions, + max_completion_tokens=max_completion_tokens, + max_prompt_tokens=max_prompt_tokens, + metadata=metadata, + model=model, + parallel_tool_calls=parallel_tool_calls, + response_format=response_format, + temperature=temperature, + stream=False, + thread=thread, + tool_resources=tool_resources, + tool_choice=tool_choice, + truncation_strategy=truncation_strategy, + top_p=top_p, + tools=tools, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return await self.runs.poll( + run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms + ) + + @overload + def create_and_run_stream( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]: + """Create a thread and stream the run back""" + ... + + @overload + def create_and_run_stream( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + event_handler: AsyncAssistantEventHandlerT, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]: + """Create a thread and stream the run back""" + ... + + def create_and_run_stream( + self, + *, + assistant_id: str, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN, + tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN, + event_handler: AsyncAssistantEventHandlerT | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ( + AsyncAssistantStreamManager[AsyncAssistantEventHandler] + | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT] + ): + """Create a thread and stream the run back""" + extra_headers = { + "OpenAI-Beta": "assistants=v2", + "X-Stainless-Stream-Helper": "threads.create_and_run_stream", + "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false", + **(extra_headers or {}), + } + request = self._post( + "/threads/runs", + body=maybe_transform( + { + "assistant_id": assistant_id, + "instructions": instructions, + "max_completion_tokens": max_completion_tokens, + "max_prompt_tokens": max_prompt_tokens, + "metadata": metadata, + "model": model, + "parallel_tool_calls": parallel_tool_calls, + "response_format": response_format, + "temperature": temperature, + "tool_choice": tool_choice, + "stream": True, + "thread": thread, + "tools": tools, + "tool_resources": tool_resources, + "truncation_strategy": truncation_strategy, + "top_p": top_p, + }, + thread_create_and_run_params.ThreadCreateAndRunParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Run, + stream=True, + stream_cls=AsyncStream[AssistantStreamEvent], + ) + return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler()) + + +class ThreadsWithRawResponse: + def __init__(self, threads: Threads) -> None: + self._threads = threads + + self.create = _legacy_response.to_raw_response_wrapper( + threads.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + threads.retrieve, + ) + self.update = _legacy_response.to_raw_response_wrapper( + threads.update, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + threads.delete, + ) + self.create_and_run = _legacy_response.to_raw_response_wrapper( + threads.create_and_run, + ) + + @cached_property + def runs(self) -> RunsWithRawResponse: + return RunsWithRawResponse(self._threads.runs) + + @cached_property + def messages(self) -> MessagesWithRawResponse: + return MessagesWithRawResponse(self._threads.messages) + + +class AsyncThreadsWithRawResponse: + def __init__(self, threads: AsyncThreads) -> None: + self._threads = threads + + self.create = _legacy_response.async_to_raw_response_wrapper( + threads.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + threads.retrieve, + ) + self.update = _legacy_response.async_to_raw_response_wrapper( + threads.update, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + threads.delete, + ) + self.create_and_run = _legacy_response.async_to_raw_response_wrapper( + threads.create_and_run, + ) + + @cached_property + def runs(self) -> AsyncRunsWithRawResponse: + return AsyncRunsWithRawResponse(self._threads.runs) + + @cached_property + def messages(self) -> AsyncMessagesWithRawResponse: + return AsyncMessagesWithRawResponse(self._threads.messages) + + +class ThreadsWithStreamingResponse: + def __init__(self, threads: Threads) -> None: + self._threads = threads + + self.create = to_streamed_response_wrapper( + threads.create, + ) + self.retrieve = to_streamed_response_wrapper( + threads.retrieve, + ) + self.update = to_streamed_response_wrapper( + threads.update, + ) + self.delete = to_streamed_response_wrapper( + threads.delete, + ) + self.create_and_run = to_streamed_response_wrapper( + threads.create_and_run, + ) + + @cached_property + def runs(self) -> RunsWithStreamingResponse: + return RunsWithStreamingResponse(self._threads.runs) + + @cached_property + def messages(self) -> MessagesWithStreamingResponse: + return MessagesWithStreamingResponse(self._threads.messages) + + +class AsyncThreadsWithStreamingResponse: + def __init__(self, threads: AsyncThreads) -> None: + self._threads = threads + + self.create = async_to_streamed_response_wrapper( + threads.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + threads.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + threads.update, + ) + self.delete = async_to_streamed_response_wrapper( + threads.delete, + ) + self.create_and_run = async_to_streamed_response_wrapper( + threads.create_and_run, + ) + + @cached_property + def runs(self) -> AsyncRunsWithStreamingResponse: + return AsyncRunsWithStreamingResponse(self._threads.runs) + + @cached_property + def messages(self) -> AsyncMessagesWithStreamingResponse: + return AsyncMessagesWithStreamingResponse(self._threads.messages) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/__init__.py new file mode 100644 index 00000000..52dfdcea --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .chat import ( + Chat, + AsyncChat, + ChatWithRawResponse, + AsyncChatWithRawResponse, + ChatWithStreamingResponse, + AsyncChatWithStreamingResponse, +) +from .completions import ( + Completions, + AsyncCompletions, + CompletionsWithRawResponse, + AsyncCompletionsWithRawResponse, + CompletionsWithStreamingResponse, + AsyncCompletionsWithStreamingResponse, +) + +__all__ = [ + "Completions", + "AsyncCompletions", + "CompletionsWithRawResponse", + "AsyncCompletionsWithRawResponse", + "CompletionsWithStreamingResponse", + "AsyncCompletionsWithStreamingResponse", + "Chat", + "AsyncChat", + "ChatWithRawResponse", + "AsyncChatWithRawResponse", + "ChatWithStreamingResponse", + "AsyncChatWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/chat.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/chat.py new file mode 100644 index 00000000..14f9224b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/chat.py @@ -0,0 +1,102 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from .completions.completions import ( + Completions, + AsyncCompletions, + CompletionsWithRawResponse, + AsyncCompletionsWithRawResponse, + CompletionsWithStreamingResponse, + AsyncCompletionsWithStreamingResponse, +) + +__all__ = ["Chat", "AsyncChat"] + + +class Chat(SyncAPIResource): + @cached_property + def completions(self) -> Completions: + return Completions(self._client) + + @cached_property + def with_raw_response(self) -> ChatWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return ChatWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ChatWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return ChatWithStreamingResponse(self) + + +class AsyncChat(AsyncAPIResource): + @cached_property + def completions(self) -> AsyncCompletions: + return AsyncCompletions(self._client) + + @cached_property + def with_raw_response(self) -> AsyncChatWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncChatWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncChatWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncChatWithStreamingResponse(self) + + +class ChatWithRawResponse: + def __init__(self, chat: Chat) -> None: + self._chat = chat + + @cached_property + def completions(self) -> CompletionsWithRawResponse: + return CompletionsWithRawResponse(self._chat.completions) + + +class AsyncChatWithRawResponse: + def __init__(self, chat: AsyncChat) -> None: + self._chat = chat + + @cached_property + def completions(self) -> AsyncCompletionsWithRawResponse: + return AsyncCompletionsWithRawResponse(self._chat.completions) + + +class ChatWithStreamingResponse: + def __init__(self, chat: Chat) -> None: + self._chat = chat + + @cached_property + def completions(self) -> CompletionsWithStreamingResponse: + return CompletionsWithStreamingResponse(self._chat.completions) + + +class AsyncChatWithStreamingResponse: + def __init__(self, chat: AsyncChat) -> None: + self._chat = chat + + @cached_property + def completions(self) -> AsyncCompletionsWithStreamingResponse: + return AsyncCompletionsWithStreamingResponse(self._chat.completions) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/__init__.py new file mode 100644 index 00000000..12d3b3aa --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .messages import ( + Messages, + AsyncMessages, + MessagesWithRawResponse, + AsyncMessagesWithRawResponse, + MessagesWithStreamingResponse, + AsyncMessagesWithStreamingResponse, +) +from .completions import ( + Completions, + AsyncCompletions, + CompletionsWithRawResponse, + AsyncCompletionsWithRawResponse, + CompletionsWithStreamingResponse, + AsyncCompletionsWithStreamingResponse, +) + +__all__ = [ + "Messages", + "AsyncMessages", + "MessagesWithRawResponse", + "AsyncMessagesWithRawResponse", + "MessagesWithStreamingResponse", + "AsyncMessagesWithStreamingResponse", + "Completions", + "AsyncCompletions", + "CompletionsWithRawResponse", + "AsyncCompletionsWithRawResponse", + "CompletionsWithStreamingResponse", + "AsyncCompletionsWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py new file mode 100644 index 00000000..d28be012 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py @@ -0,0 +1,2331 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import inspect +from typing import Dict, List, Union, Iterable, Optional +from typing_extensions import Literal, overload + +import httpx +import pydantic + +from .... import _legacy_response +from .messages import ( + Messages, + AsyncMessages, + MessagesWithRawResponse, + AsyncMessagesWithRawResponse, + MessagesWithStreamingResponse, + AsyncMessagesWithStreamingResponse, +) +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import ( + required_args, + maybe_transform, + async_maybe_transform, +) +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...._streaming import Stream, AsyncStream +from ....pagination import SyncCursorPage, AsyncCursorPage +from ....types.chat import ( + ChatCompletionAudioParam, + completion_list_params, + completion_create_params, + completion_update_params, +) +from ...._base_client import AsyncPaginator, make_request_options +from ....types.shared.chat_model import ChatModel +from ....types.chat.chat_completion import ChatCompletion +from ....types.shared_params.metadata import Metadata +from ....types.shared.reasoning_effort import ReasoningEffort +from ....types.chat.chat_completion_chunk import ChatCompletionChunk +from ....types.chat.chat_completion_deleted import ChatCompletionDeleted +from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam +from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam +from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam +from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam +from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam +from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam + +__all__ = ["Completions", "AsyncCompletions"] + + +class Completions(SyncAPIResource): + @cached_property + def messages(self) -> Messages: + return Messages(self._client) + + @cached_property + def with_raw_response(self) -> CompletionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return CompletionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CompletionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return CompletionsWithStreamingResponse(self) + + @overload + def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion: + """ + **Starting a new project?** We recommend trying + [Responses](https://platform.openai.com/docs/api-reference/responses) to take + advantage of the latest OpenAI platform features. Compare + [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses). + + --- + + Creates a model response for the given chat conversation. Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. + + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + + Args: + messages: A list of messages comprising the conversation so far. Depending on the + [model](https://platform.openai.com/docs/models) you use, different message + types (modalities) are supported, like + [text](https://platform.openai.com/docs/guides/text-generation), + [images](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio). + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + function_call: Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a + function. + + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + + `none` is the default when no functions are present. `auto` is the default if + functions are present. + + functions: Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + logprobs: Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the `content` of + `message`. + + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. + + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + modalities: Output types that you would like the model to generate. Most models are capable + of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` + + n: How many chat completion choices to generate for each input message. Note that + you will be charged based on the number of generated tokens across all of the + choices. Keep `n` as `1` to minimize costs. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + prediction: Static predicted output content, such as the content of a text file that is + being regenerated. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` is + preferred for models that support it. + + seed: This feature is in Beta. If specified, our system will make a best effort to + sample deterministically, such that repeated requests with the same `seed` and + parameters should return the same result. Determinism is not guaranteed, and you + should refer to the `system_fingerprint` response parameter to monitor changes + in the backend. + + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming) + for more information, along with the + [streaming responses](https://platform.openai.com/docs/guides/streaming-responses) + guide for more information on how to handle the streaming events. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tool and instead generates a message. `auto` means the model can + pick between generating a message or calling one or more tools. `required` means + the model must call one or more tools. Specifying a particular tool via + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + `none` is the default when no tools are present. `auto` is the default if tools + are present. + + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. A max of 128 functions are supported. + + top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + web_search_options: This tool searches the web for relevant results to use in a response. Learn more + about the + [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + stream: Literal[True], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[ChatCompletionChunk]: + """ + **Starting a new project?** We recommend trying + [Responses](https://platform.openai.com/docs/api-reference/responses) to take + advantage of the latest OpenAI platform features. Compare + [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses). + + --- + + Creates a model response for the given chat conversation. Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. + + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + + Args: + messages: A list of messages comprising the conversation so far. Depending on the + [model](https://platform.openai.com/docs/models) you use, different message + types (modalities) are supported, like + [text](https://platform.openai.com/docs/guides/text-generation), + [images](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio). + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming) + for more information, along with the + [streaming responses](https://platform.openai.com/docs/guides/streaming-responses) + guide for more information on how to handle the streaming events. + + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + function_call: Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a + function. + + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + + `none` is the default when no functions are present. `auto` is the default if + functions are present. + + functions: Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + logprobs: Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the `content` of + `message`. + + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. + + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + modalities: Output types that you would like the model to generate. Most models are capable + of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` + + n: How many chat completion choices to generate for each input message. Note that + you will be charged based on the number of generated tokens across all of the + choices. Keep `n` as `1` to minimize costs. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + prediction: Static predicted output content, such as the content of a text file that is + being regenerated. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` is + preferred for models that support it. + + seed: This feature is in Beta. If specified, our system will make a best effort to + sample deterministically, such that repeated requests with the same `seed` and + parameters should return the same result. Determinism is not guaranteed, and you + should refer to the `system_fingerprint` response parameter to monitor changes + in the backend. + + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tool and instead generates a message. `auto` means the model can + pick between generating a message or calling one or more tools. `required` means + the model must call one or more tools. Specifying a particular tool via + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + `none` is the default when no tools are present. `auto` is the default if tools + are present. + + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. A max of 128 functions are supported. + + top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + web_search_options: This tool searches the web for relevant results to use in a response. Learn more + about the + [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + stream: bool, + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: + """ + **Starting a new project?** We recommend trying + [Responses](https://platform.openai.com/docs/api-reference/responses) to take + advantage of the latest OpenAI platform features. Compare + [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses). + + --- + + Creates a model response for the given chat conversation. Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. + + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + + Args: + messages: A list of messages comprising the conversation so far. Depending on the + [model](https://platform.openai.com/docs/models) you use, different message + types (modalities) are supported, like + [text](https://platform.openai.com/docs/guides/text-generation), + [images](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio). + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming) + for more information, along with the + [streaming responses](https://platform.openai.com/docs/guides/streaming-responses) + guide for more information on how to handle the streaming events. + + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + function_call: Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a + function. + + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + + `none` is the default when no functions are present. `auto` is the default if + functions are present. + + functions: Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + logprobs: Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the `content` of + `message`. + + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. + + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + modalities: Output types that you would like the model to generate. Most models are capable + of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` + + n: How many chat completion choices to generate for each input message. Note that + you will be charged based on the number of generated tokens across all of the + choices. Keep `n` as `1` to minimize costs. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + prediction: Static predicted output content, such as the content of a text file that is + being regenerated. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` is + preferred for models that support it. + + seed: This feature is in Beta. If specified, our system will make a best effort to + sample deterministically, such that repeated requests with the same `seed` and + parameters should return the same result. Determinism is not guaranteed, and you + should refer to the `system_fingerprint` response parameter to monitor changes + in the backend. + + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tool and instead generates a message. `auto` means the model can + pick between generating a message or calling one or more tools. `required` means + the model must call one or more tools. Specifying a particular tool via + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + `none` is the default when no tools are present. `auto` is the default if tools + are present. + + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. A max of 128 functions are supported. + + top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + web_search_options: This tool searches the web for relevant results to use in a response. Learn more + about the + [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["messages", "model"], ["messages", "model", "stream"]) + def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: + validate_response_format(response_format) + return self._post( + "/chat/completions", + body=maybe_transform( + { + "messages": messages, + "model": model, + "audio": audio, + "frequency_penalty": frequency_penalty, + "function_call": function_call, + "functions": functions, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, + "max_tokens": max_tokens, + "metadata": metadata, + "modalities": modalities, + "n": n, + "parallel_tool_calls": parallel_tool_calls, + "prediction": prediction, + "presence_penalty": presence_penalty, + "reasoning_effort": reasoning_effort, + "response_format": response_format, + "seed": seed, + "service_tier": service_tier, + "stop": stop, + "store": store, + "stream": stream, + "stream_options": stream_options, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_logprobs": top_logprobs, + "top_p": top_p, + "user": user, + "web_search_options": web_search_options, + }, + completion_create_params.CompletionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ChatCompletion, + stream=stream or False, + stream_cls=Stream[ChatCompletionChunk], + ) + + def retrieve( + self, + completion_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion: + """Get a stored chat completion. + + Only Chat Completions that have been created with + the `store` parameter set to `true` will be returned. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return self._get( + f"/chat/completions/{completion_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ChatCompletion, + ) + + def update( + self, + completion_id: str, + *, + metadata: Optional[Metadata], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion: + """Modify a stored chat completion. + + Only Chat Completions that have been created + with the `store` parameter set to `true` can be modified. Currently, the only + supported modification is to update the `metadata` field. + + Args: + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return self._post( + f"/chat/completions/{completion_id}", + body=maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ChatCompletion, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[ChatCompletion]: + """List stored Chat Completions. + + Only Chat Completions that have been stored with + the `store` parameter set to `true` will be returned. + + Args: + after: Identifier for the last chat completion from the previous pagination request. + + limit: Number of Chat Completions to retrieve. + + metadata: + A list of metadata keys to filter the Chat Completions by. Example: + + `metadata[key1]=value1&metadata[key2]=value2` + + model: The model used to generate the Chat Completions. + + order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or + `desc` for descending order. Defaults to `asc`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/chat/completions", + page=SyncCursorPage[ChatCompletion], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "metadata": metadata, + "model": model, + "order": order, + }, + completion_list_params.CompletionListParams, + ), + ), + model=ChatCompletion, + ) + + def delete( + self, + completion_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletionDeleted: + """Delete a stored chat completion. + + Only Chat Completions that have been created + with the `store` parameter set to `true` can be deleted. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return self._delete( + f"/chat/completions/{completion_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ChatCompletionDeleted, + ) + + +class AsyncCompletions(AsyncAPIResource): + @cached_property + def messages(self) -> AsyncMessages: + return AsyncMessages(self._client) + + @cached_property + def with_raw_response(self) -> AsyncCompletionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncCompletionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncCompletionsWithStreamingResponse(self) + + @overload + async def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion: + """ + **Starting a new project?** We recommend trying + [Responses](https://platform.openai.com/docs/api-reference/responses) to take + advantage of the latest OpenAI platform features. Compare + [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses). + + --- + + Creates a model response for the given chat conversation. Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. + + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + + Args: + messages: A list of messages comprising the conversation so far. Depending on the + [model](https://platform.openai.com/docs/models) you use, different message + types (modalities) are supported, like + [text](https://platform.openai.com/docs/guides/text-generation), + [images](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio). + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + function_call: Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a + function. + + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + + `none` is the default when no functions are present. `auto` is the default if + functions are present. + + functions: Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + logprobs: Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the `content` of + `message`. + + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. + + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + modalities: Output types that you would like the model to generate. Most models are capable + of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` + + n: How many chat completion choices to generate for each input message. Note that + you will be charged based on the number of generated tokens across all of the + choices. Keep `n` as `1` to minimize costs. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + prediction: Static predicted output content, such as the content of a text file that is + being regenerated. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` is + preferred for models that support it. + + seed: This feature is in Beta. If specified, our system will make a best effort to + sample deterministically, such that repeated requests with the same `seed` and + parameters should return the same result. Determinism is not guaranteed, and you + should refer to the `system_fingerprint` response parameter to monitor changes + in the backend. + + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming) + for more information, along with the + [streaming responses](https://platform.openai.com/docs/guides/streaming-responses) + guide for more information on how to handle the streaming events. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tool and instead generates a message. `auto` means the model can + pick between generating a message or calling one or more tools. `required` means + the model must call one or more tools. Specifying a particular tool via + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + `none` is the default when no tools are present. `auto` is the default if tools + are present. + + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. A max of 128 functions are supported. + + top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + web_search_options: This tool searches the web for relevant results to use in a response. Learn more + about the + [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + stream: Literal[True], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ChatCompletionChunk]: + """ + **Starting a new project?** We recommend trying + [Responses](https://platform.openai.com/docs/api-reference/responses) to take + advantage of the latest OpenAI platform features. Compare + [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses). + + --- + + Creates a model response for the given chat conversation. Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. + + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + + Args: + messages: A list of messages comprising the conversation so far. Depending on the + [model](https://platform.openai.com/docs/models) you use, different message + types (modalities) are supported, like + [text](https://platform.openai.com/docs/guides/text-generation), + [images](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio). + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming) + for more information, along with the + [streaming responses](https://platform.openai.com/docs/guides/streaming-responses) + guide for more information on how to handle the streaming events. + + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + function_call: Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a + function. + + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + + `none` is the default when no functions are present. `auto` is the default if + functions are present. + + functions: Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + logprobs: Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the `content` of + `message`. + + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. + + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + modalities: Output types that you would like the model to generate. Most models are capable + of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` + + n: How many chat completion choices to generate for each input message. Note that + you will be charged based on the number of generated tokens across all of the + choices. Keep `n` as `1` to minimize costs. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + prediction: Static predicted output content, such as the content of a text file that is + being regenerated. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` is + preferred for models that support it. + + seed: This feature is in Beta. If specified, our system will make a best effort to + sample deterministically, such that repeated requests with the same `seed` and + parameters should return the same result. Determinism is not guaranteed, and you + should refer to the `system_fingerprint` response parameter to monitor changes + in the backend. + + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tool and instead generates a message. `auto` means the model can + pick between generating a message or calling one or more tools. `required` means + the model must call one or more tools. Specifying a particular tool via + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + `none` is the default when no tools are present. `auto` is the default if tools + are present. + + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. A max of 128 functions are supported. + + top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + web_search_options: This tool searches the web for relevant results to use in a response. Learn more + about the + [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + stream: bool, + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: + """ + **Starting a new project?** We recommend trying + [Responses](https://platform.openai.com/docs/api-reference/responses) to take + advantage of the latest OpenAI platform features. Compare + [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses). + + --- + + Creates a model response for the given chat conversation. Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. + + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + + Args: + messages: A list of messages comprising the conversation so far. Depending on the + [model](https://platform.openai.com/docs/models) you use, different message + types (modalities) are supported, like + [text](https://platform.openai.com/docs/guides/text-generation), + [images](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio). + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming) + for more information, along with the + [streaming responses](https://platform.openai.com/docs/guides/streaming-responses) + guide for more information on how to handle the streaming events. + + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + function_call: Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a + function. + + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + + `none` is the default when no functions are present. `auto` is the default if + functions are present. + + functions: Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + logprobs: Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the `content` of + `message`. + + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. + + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + modalities: Output types that you would like the model to generate. Most models are capable + of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` + + n: How many chat completion choices to generate for each input message. Note that + you will be charged based on the number of generated tokens across all of the + choices. Keep `n` as `1` to minimize costs. + + parallel_tool_calls: Whether to enable + [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + + prediction: Static predicted output content, such as the content of a text file that is + being regenerated. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + reasoning_effort: **o-series models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` is + preferred for models that support it. + + seed: This feature is in Beta. If specified, our system will make a best effort to + sample deterministically, such that repeated requests with the same `seed` and + parameters should return the same result. Determinism is not guaranteed, and you + should refer to the `system_fingerprint` response parameter to monitor changes + in the backend. + + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + tool_choice: Controls which (if any) tool is called by the model. `none` means the model will + not call any tool and instead generates a message. `auto` means the model can + pick between generating a message or calling one or more tools. `required` means + the model must call one or more tools. Specifying a particular tool via + `{"type": "function", "function": {"name": "my_function"}}` forces the model to + call that tool. + + `none` is the default when no tools are present. `auto` is the default if tools + are present. + + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. A max of 128 functions are supported. + + top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + web_search_options: This tool searches the web for relevant results to use in a response. Learn more + about the + [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["messages", "model"], ["messages", "model", "stream"]) + async def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, + functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, + tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: + validate_response_format(response_format) + return await self._post( + "/chat/completions", + body=await async_maybe_transform( + { + "messages": messages, + "model": model, + "audio": audio, + "frequency_penalty": frequency_penalty, + "function_call": function_call, + "functions": functions, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, + "max_tokens": max_tokens, + "metadata": metadata, + "modalities": modalities, + "n": n, + "parallel_tool_calls": parallel_tool_calls, + "prediction": prediction, + "presence_penalty": presence_penalty, + "reasoning_effort": reasoning_effort, + "response_format": response_format, + "seed": seed, + "service_tier": service_tier, + "stop": stop, + "store": store, + "stream": stream, + "stream_options": stream_options, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_logprobs": top_logprobs, + "top_p": top_p, + "user": user, + "web_search_options": web_search_options, + }, + completion_create_params.CompletionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ChatCompletion, + stream=stream or False, + stream_cls=AsyncStream[ChatCompletionChunk], + ) + + async def retrieve( + self, + completion_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion: + """Get a stored chat completion. + + Only Chat Completions that have been created with + the `store` parameter set to `true` will be returned. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return await self._get( + f"/chat/completions/{completion_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ChatCompletion, + ) + + async def update( + self, + completion_id: str, + *, + metadata: Optional[Metadata], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion: + """Modify a stored chat completion. + + Only Chat Completions that have been created + with the `store` parameter set to `true` can be modified. Currently, the only + supported modification is to update the `metadata` field. + + Args: + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return await self._post( + f"/chat/completions/{completion_id}", + body=await async_maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ChatCompletion, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[ChatCompletion, AsyncCursorPage[ChatCompletion]]: + """List stored Chat Completions. + + Only Chat Completions that have been stored with + the `store` parameter set to `true` will be returned. + + Args: + after: Identifier for the last chat completion from the previous pagination request. + + limit: Number of Chat Completions to retrieve. + + metadata: + A list of metadata keys to filter the Chat Completions by. Example: + + `metadata[key1]=value1&metadata[key2]=value2` + + model: The model used to generate the Chat Completions. + + order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or + `desc` for descending order. Defaults to `asc`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/chat/completions", + page=AsyncCursorPage[ChatCompletion], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "metadata": metadata, + "model": model, + "order": order, + }, + completion_list_params.CompletionListParams, + ), + ), + model=ChatCompletion, + ) + + async def delete( + self, + completion_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletionDeleted: + """Delete a stored chat completion. + + Only Chat Completions that have been created + with the `store` parameter set to `true` can be deleted. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return await self._delete( + f"/chat/completions/{completion_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ChatCompletionDeleted, + ) + + +class CompletionsWithRawResponse: + def __init__(self, completions: Completions) -> None: + self._completions = completions + + self.create = _legacy_response.to_raw_response_wrapper( + completions.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + completions.retrieve, + ) + self.update = _legacy_response.to_raw_response_wrapper( + completions.update, + ) + self.list = _legacy_response.to_raw_response_wrapper( + completions.list, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + completions.delete, + ) + + @cached_property + def messages(self) -> MessagesWithRawResponse: + return MessagesWithRawResponse(self._completions.messages) + + +class AsyncCompletionsWithRawResponse: + def __init__(self, completions: AsyncCompletions) -> None: + self._completions = completions + + self.create = _legacy_response.async_to_raw_response_wrapper( + completions.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + completions.retrieve, + ) + self.update = _legacy_response.async_to_raw_response_wrapper( + completions.update, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + completions.list, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + completions.delete, + ) + + @cached_property + def messages(self) -> AsyncMessagesWithRawResponse: + return AsyncMessagesWithRawResponse(self._completions.messages) + + +class CompletionsWithStreamingResponse: + def __init__(self, completions: Completions) -> None: + self._completions = completions + + self.create = to_streamed_response_wrapper( + completions.create, + ) + self.retrieve = to_streamed_response_wrapper( + completions.retrieve, + ) + self.update = to_streamed_response_wrapper( + completions.update, + ) + self.list = to_streamed_response_wrapper( + completions.list, + ) + self.delete = to_streamed_response_wrapper( + completions.delete, + ) + + @cached_property + def messages(self) -> MessagesWithStreamingResponse: + return MessagesWithStreamingResponse(self._completions.messages) + + +class AsyncCompletionsWithStreamingResponse: + def __init__(self, completions: AsyncCompletions) -> None: + self._completions = completions + + self.create = async_to_streamed_response_wrapper( + completions.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + completions.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + completions.update, + ) + self.list = async_to_streamed_response_wrapper( + completions.list, + ) + self.delete = async_to_streamed_response_wrapper( + completions.delete, + ) + + @cached_property + def messages(self) -> AsyncMessagesWithStreamingResponse: + return AsyncMessagesWithStreamingResponse(self._completions.messages) + + +def validate_response_format(response_format: object) -> None: + if inspect.isclass(response_format) and issubclass(response_format, pydantic.BaseModel): + raise TypeError( + "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `beta.chat.completions.parse()` instead" + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/messages.py b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/messages.py new file mode 100644 index 00000000..fac15fba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/messages.py @@ -0,0 +1,212 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal + +import httpx + +from .... import _legacy_response +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import maybe_transform +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ....pagination import SyncCursorPage, AsyncCursorPage +from ...._base_client import AsyncPaginator, make_request_options +from ....types.chat.completions import message_list_params +from ....types.chat.chat_completion_store_message import ChatCompletionStoreMessage + +__all__ = ["Messages", "AsyncMessages"] + + +class Messages(SyncAPIResource): + @cached_property + def with_raw_response(self) -> MessagesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return MessagesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> MessagesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return MessagesWithStreamingResponse(self) + + def list( + self, + completion_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[ChatCompletionStoreMessage]: + """Get the messages in a stored chat completion. + + Only Chat Completions that have + been created with the `store` parameter set to `true` will be returned. + + Args: + after: Identifier for the last message from the previous pagination request. + + limit: Number of messages to retrieve. + + order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc` + for descending order. Defaults to `asc`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return self._get_api_list( + f"/chat/completions/{completion_id}/messages", + page=SyncCursorPage[ChatCompletionStoreMessage], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "order": order, + }, + message_list_params.MessageListParams, + ), + ), + model=ChatCompletionStoreMessage, + ) + + +class AsyncMessages(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncMessagesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncMessagesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncMessagesWithStreamingResponse(self) + + def list( + self, + completion_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[ChatCompletionStoreMessage, AsyncCursorPage[ChatCompletionStoreMessage]]: + """Get the messages in a stored chat completion. + + Only Chat Completions that have + been created with the `store` parameter set to `true` will be returned. + + Args: + after: Identifier for the last message from the previous pagination request. + + limit: Number of messages to retrieve. + + order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc` + for descending order. Defaults to `asc`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return self._get_api_list( + f"/chat/completions/{completion_id}/messages", + page=AsyncCursorPage[ChatCompletionStoreMessage], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "order": order, + }, + message_list_params.MessageListParams, + ), + ), + model=ChatCompletionStoreMessage, + ) + + +class MessagesWithRawResponse: + def __init__(self, messages: Messages) -> None: + self._messages = messages + + self.list = _legacy_response.to_raw_response_wrapper( + messages.list, + ) + + +class AsyncMessagesWithRawResponse: + def __init__(self, messages: AsyncMessages) -> None: + self._messages = messages + + self.list = _legacy_response.async_to_raw_response_wrapper( + messages.list, + ) + + +class MessagesWithStreamingResponse: + def __init__(self, messages: Messages) -> None: + self._messages = messages + + self.list = to_streamed_response_wrapper( + messages.list, + ) + + +class AsyncMessagesWithStreamingResponse: + def __init__(self, messages: AsyncMessages) -> None: + self._messages = messages + + self.list = async_to_streamed_response_wrapper( + messages.list, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/completions.py b/.venv/lib/python3.12/site-packages/openai/resources/completions.py new file mode 100644 index 00000000..171f5093 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/completions.py @@ -0,0 +1,1148 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Union, Iterable, Optional +from typing_extensions import Literal, overload + +import httpx + +from .. import _legacy_response +from ..types import completion_create_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import ( + required_args, + maybe_transform, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from .._streaming import Stream, AsyncStream +from .._base_client import ( + make_request_options, +) +from ..types.completion import Completion +from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam + +__all__ = ["Completions", "AsyncCompletions"] + + +class Completions(SyncAPIResource): + @cached_property + def with_raw_response(self) -> CompletionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return CompletionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CompletionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return CompletionsWithStreamingResponse(self) + + @overload + def create( + self, + *, + model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]], + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Completion: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs. + Mathematically, the bias is added to the logits generated by the model prior to + sampling. The exact effect will vary per model, but values between -1 and 1 + should decrease or increase likelihood of selection; values like -100 or 100 + should result in a ban or exclusive selection of the relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as + well the chosen tokens. For example, if `logprobs` is 5, the API will return a + list of the 5 most likely tokens. The API will always return the `logprob` of + the sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the + completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + seed: If specified, our system will make a best effort to sample deterministically, + such that repeated requests with the same `seed` and parameters should return + the same result. + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` + response parameter to monitor changes in the backend. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + suffix: The suffix that comes after a completion of inserted text. + + This parameter is only supported for `gpt-3.5-turbo-instruct`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]], + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None], + stream: Literal[True], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[Completion]: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs. + Mathematically, the bias is added to the logits generated by the model prior to + sampling. The exact effect will vary per model, but values between -1 and 1 + should decrease or increase likelihood of selection; values like -100 or 100 + should result in a ban or exclusive selection of the relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as + well the chosen tokens. For example, if `logprobs` is 5, the API will return a + list of the 5 most likely tokens. The API will always return the `logprob` of + the sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the + completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + seed: If specified, our system will make a best effort to sample deterministically, + such that repeated requests with the same `seed` and parameters should return + the same result. + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` + response parameter to monitor changes in the backend. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + suffix: The suffix that comes after a completion of inserted text. + + This parameter is only supported for `gpt-3.5-turbo-instruct`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]], + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None], + stream: bool, + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Completion | Stream[Completion]: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs. + Mathematically, the bias is added to the logits generated by the model prior to + sampling. The exact effect will vary per model, but values between -1 and 1 + should decrease or increase likelihood of selection; values like -100 or 100 + should result in a ban or exclusive selection of the relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as + well the chosen tokens. For example, if `logprobs` is 5, the API will return a + list of the 5 most likely tokens. The API will always return the `logprob` of + the sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the + completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + seed: If specified, our system will make a best effort to sample deterministically, + such that repeated requests with the same `seed` and parameters should return + the same result. + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` + response parameter to monitor changes in the backend. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + suffix: The suffix that comes after a completion of inserted text. + + This parameter is only supported for `gpt-3.5-turbo-instruct`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["model", "prompt"], ["model", "prompt", "stream"]) + def create( + self, + *, + model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]], + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Completion | Stream[Completion]: + return self._post( + "/completions", + body=maybe_transform( + { + "model": model, + "prompt": prompt, + "best_of": best_of, + "echo": echo, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_tokens": max_tokens, + "n": n, + "presence_penalty": presence_penalty, + "seed": seed, + "stop": stop, + "stream": stream, + "stream_options": stream_options, + "suffix": suffix, + "temperature": temperature, + "top_p": top_p, + "user": user, + }, + completion_create_params.CompletionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Completion, + stream=stream or False, + stream_cls=Stream[Completion], + ) + + +class AsyncCompletions(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncCompletionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncCompletionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncCompletionsWithStreamingResponse(self) + + @overload + async def create( + self, + *, + model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]], + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Completion: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs. + Mathematically, the bias is added to the logits generated by the model prior to + sampling. The exact effect will vary per model, but values between -1 and 1 + should decrease or increase likelihood of selection; values like -100 or 100 + should result in a ban or exclusive selection of the relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as + well the chosen tokens. For example, if `logprobs` is 5, the API will return a + list of the 5 most likely tokens. The API will always return the `logprob` of + the sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the + completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + seed: If specified, our system will make a best effort to sample deterministically, + such that repeated requests with the same `seed` and parameters should return + the same result. + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` + response parameter to monitor changes in the backend. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + suffix: The suffix that comes after a completion of inserted text. + + This parameter is only supported for `gpt-3.5-turbo-instruct`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]], + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None], + stream: Literal[True], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[Completion]: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs. + Mathematically, the bias is added to the logits generated by the model prior to + sampling. The exact effect will vary per model, but values between -1 and 1 + should decrease or increase likelihood of selection; values like -100 or 100 + should result in a ban or exclusive selection of the relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as + well the chosen tokens. For example, if `logprobs` is 5, the API will return a + list of the 5 most likely tokens. The API will always return the `logprob` of + the sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the + completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + seed: If specified, our system will make a best effort to sample deterministically, + such that repeated requests with the same `seed` and parameters should return + the same result. + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` + response parameter to monitor changes in the backend. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + suffix: The suffix that comes after a completion of inserted text. + + This parameter is only supported for `gpt-3.5-turbo-instruct`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]], + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None], + stream: bool, + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Completion | AsyncStream[Completion]: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs. + Mathematically, the bias is added to the logits generated by the model prior to + sampling. The exact effect will vary per model, but values between -1 and 1 + should decrease or increase likelihood of selection; values like -100 or 100 + should result in a ban or exclusive selection of the relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as + well the chosen tokens. For example, if `logprobs` is 5, the API will return a + list of the 5 most likely tokens. The API will always return the `logprob` of + the sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the + completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + + seed: If specified, our system will make a best effort to sample deterministically, + such that repeated requests with the same `seed` and parameters should return + the same result. + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` + response parameter to monitor changes in the backend. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + suffix: The suffix that comes after a completion of inserted text. + + This parameter is only supported for `gpt-3.5-turbo-instruct`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["model", "prompt"], ["model", "prompt", "stream"]) + async def create( + self, + *, + model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]], + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Completion | AsyncStream[Completion]: + return await self._post( + "/completions", + body=await async_maybe_transform( + { + "model": model, + "prompt": prompt, + "best_of": best_of, + "echo": echo, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_tokens": max_tokens, + "n": n, + "presence_penalty": presence_penalty, + "seed": seed, + "stop": stop, + "stream": stream, + "stream_options": stream_options, + "suffix": suffix, + "temperature": temperature, + "top_p": top_p, + "user": user, + }, + completion_create_params.CompletionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Completion, + stream=stream or False, + stream_cls=AsyncStream[Completion], + ) + + +class CompletionsWithRawResponse: + def __init__(self, completions: Completions) -> None: + self._completions = completions + + self.create = _legacy_response.to_raw_response_wrapper( + completions.create, + ) + + +class AsyncCompletionsWithRawResponse: + def __init__(self, completions: AsyncCompletions) -> None: + self._completions = completions + + self.create = _legacy_response.async_to_raw_response_wrapper( + completions.create, + ) + + +class CompletionsWithStreamingResponse: + def __init__(self, completions: Completions) -> None: + self._completions = completions + + self.create = to_streamed_response_wrapper( + completions.create, + ) + + +class AsyncCompletionsWithStreamingResponse: + def __init__(self, completions: AsyncCompletions) -> None: + self._completions = completions + + self.create = async_to_streamed_response_wrapper( + completions.create, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py b/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py new file mode 100644 index 00000000..a392d5eb --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py @@ -0,0 +1,290 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import array +import base64 +from typing import List, Union, Iterable, cast +from typing_extensions import Literal + +import httpx + +from .. import _legacy_response +from ..types import embedding_create_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import is_given, maybe_transform +from .._compat import cached_property +from .._extras import numpy as np, has_numpy +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from .._base_client import make_request_options +from ..types.embedding_model import EmbeddingModel +from ..types.create_embedding_response import CreateEmbeddingResponse + +__all__ = ["Embeddings", "AsyncEmbeddings"] + + +class Embeddings(SyncAPIResource): + @cached_property + def with_raw_response(self) -> EmbeddingsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return EmbeddingsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> EmbeddingsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return EmbeddingsWithStreamingResponse(self) + + def create( + self, + *, + input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + model: Union[str, EmbeddingModel], + dimensions: int | NotGiven = NOT_GIVEN, + encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CreateEmbeddingResponse: + """ + Creates an embedding vector representing the input text. + + Args: + input: Input text to embed, encoded as a string or array of tokens. To embed multiple + inputs in a single request, pass an array of strings or array of token arrays. + The input must not exceed the max input tokens for the model (8192 tokens for + `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 + dimensions or less. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. Some models may also impose a limit on total number of + tokens summed across inputs. + + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + dimensions: The number of dimensions the resulting output embeddings should have. Only + supported in `text-embedding-3` and later models. + + encoding_format: The format to return the embeddings in. Can be either `float` or + [`base64`](https://pypi.org/project/pybase64/). + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + params = { + "input": input, + "model": model, + "user": user, + "dimensions": dimensions, + "encoding_format": encoding_format, + } + if not is_given(encoding_format): + params["encoding_format"] = "base64" + + def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: + if is_given(encoding_format): + # don't modify the response object if a user explicitly asked for a format + return obj + + for embedding in obj.data: + data = cast(object, embedding.embedding) + if not isinstance(data, str): + continue + if not has_numpy(): + # use array for base64 optimisation + embedding.embedding = array.array("f", base64.b64decode(data)).tolist() + else: + embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] + base64.b64decode(data), dtype="float32" + ).tolist() + + return obj + + return self._post( + "/embeddings", + body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + cast_to=CreateEmbeddingResponse, + ) + + +class AsyncEmbeddings(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncEmbeddingsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncEmbeddingsWithStreamingResponse(self) + + async def create( + self, + *, + input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + model: Union[str, EmbeddingModel], + dimensions: int | NotGiven = NOT_GIVEN, + encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CreateEmbeddingResponse: + """ + Creates an embedding vector representing the input text. + + Args: + input: Input text to embed, encoded as a string or array of tokens. To embed multiple + inputs in a single request, pass an array of strings or array of token arrays. + The input must not exceed the max input tokens for the model (8192 tokens for + `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 + dimensions or less. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. Some models may also impose a limit on total number of + tokens summed across inputs. + + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + dimensions: The number of dimensions the resulting output embeddings should have. Only + supported in `text-embedding-3` and later models. + + encoding_format: The format to return the embeddings in. Can be either `float` or + [`base64`](https://pypi.org/project/pybase64/). + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + params = { + "input": input, + "model": model, + "user": user, + "dimensions": dimensions, + "encoding_format": encoding_format, + } + if not is_given(encoding_format): + params["encoding_format"] = "base64" + + def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: + if is_given(encoding_format): + # don't modify the response object if a user explicitly asked for a format + return obj + + for embedding in obj.data: + data = cast(object, embedding.embedding) + if not isinstance(data, str): + continue + if not has_numpy(): + # use array for base64 optimisation + embedding.embedding = array.array("f", base64.b64decode(data)).tolist() + else: + embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] + base64.b64decode(data), dtype="float32" + ).tolist() + + return obj + + return await self._post( + "/embeddings", + body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + cast_to=CreateEmbeddingResponse, + ) + + +class EmbeddingsWithRawResponse: + def __init__(self, embeddings: Embeddings) -> None: + self._embeddings = embeddings + + self.create = _legacy_response.to_raw_response_wrapper( + embeddings.create, + ) + + +class AsyncEmbeddingsWithRawResponse: + def __init__(self, embeddings: AsyncEmbeddings) -> None: + self._embeddings = embeddings + + self.create = _legacy_response.async_to_raw_response_wrapper( + embeddings.create, + ) + + +class EmbeddingsWithStreamingResponse: + def __init__(self, embeddings: Embeddings) -> None: + self._embeddings = embeddings + + self.create = to_streamed_response_wrapper( + embeddings.create, + ) + + +class AsyncEmbeddingsWithStreamingResponse: + def __init__(self, embeddings: AsyncEmbeddings) -> None: + self._embeddings = embeddings + + self.create = async_to_streamed_response_wrapper( + embeddings.create, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/files.py b/.venv/lib/python3.12/site-packages/openai/resources/files.py new file mode 100644 index 00000000..2eaa4a64 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/files.py @@ -0,0 +1,767 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import time +import typing_extensions +from typing import Mapping, cast +from typing_extensions import Literal + +import httpx + +from .. import _legacy_response +from ..types import FilePurpose, file_list_params, file_create_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes +from .._utils import ( + extract_files, + maybe_transform, + deepcopy_minimal, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + StreamedBinaryAPIResponse, + AsyncStreamedBinaryAPIResponse, + to_streamed_response_wrapper, + async_to_streamed_response_wrapper, + to_custom_streamed_response_wrapper, + async_to_custom_streamed_response_wrapper, +) +from ..pagination import SyncCursorPage, AsyncCursorPage +from .._base_client import AsyncPaginator, make_request_options +from ..types.file_object import FileObject +from ..types.file_deleted import FileDeleted +from ..types.file_purpose import FilePurpose + +__all__ = ["Files", "AsyncFiles"] + + +class Files(SyncAPIResource): + @cached_property + def with_raw_response(self) -> FilesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return FilesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> FilesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return FilesWithStreamingResponse(self) + + def create( + self, + *, + file: FileTypes, + purpose: FilePurpose, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FileObject: + """Upload a file that can be used across various endpoints. + + Individual files can be + up to 512 MB, and the size of all files uploaded by one organization can be up + to 100 GB. + + The Assistants API supports files up to 2 million tokens and of specific file + types. See the + [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for + details. + + The Fine-tuning API only supports `.jsonl` files. The input also has certain + required formats for fine-tuning + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or + [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) + models. + + The Batch API only supports `.jsonl` files up to 200 MB in size. The input also + has a specific required + [format](https://platform.openai.com/docs/api-reference/batch/request-input). + + Please [contact us](https://help.openai.com/) if you need to increase these + storage limits. + + Args: + file: The File object (not file name) to be uploaded. + + purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the + Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for + fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`: + Flexible file type for any purpose - `evals`: Used for eval data sets + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + body = deepcopy_minimal( + { + "file": file, + "purpose": purpose, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return self._post( + "/files", + body=maybe_transform(body, file_create_params.FileCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FileObject, + ) + + def retrieve( + self, + file_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FileObject: + """ + Returns information about a specific file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + return self._get( + f"/files/{file_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FileObject, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + purpose: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[FileObject]: + """Returns a list of files. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 10,000, and the default is 10,000. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + purpose: Only return files with the given purpose. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/files", + page=SyncCursorPage[FileObject], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "order": order, + "purpose": purpose, + }, + file_list_params.FileListParams, + ), + ), + model=FileObject, + ) + + def delete( + self, + file_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FileDeleted: + """ + Delete a file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + return self._delete( + f"/files/{file_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FileDeleted, + ) + + def content( + self, + file_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> _legacy_response.HttpxBinaryResponseContent: + """ + Returns the contents of the specified file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"Accept": "application/binary", **(extra_headers or {})} + return self._get( + f"/files/{file_id}/content", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=_legacy_response.HttpxBinaryResponseContent, + ) + + @typing_extensions.deprecated("The `.content()` method should be used instead") + def retrieve_content( + self, + file_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> str: + """ + Returns the contents of the specified file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + return self._get( + f"/files/{file_id}/content", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=str, + ) + + def wait_for_processing( + self, + id: str, + *, + poll_interval: float = 5.0, + max_wait_seconds: float = 30 * 60, + ) -> FileObject: + """Waits for the given file to be processed, default timeout is 30 mins.""" + TERMINAL_STATES = {"processed", "error", "deleted"} + + start = time.time() + file = self.retrieve(id) + while file.status not in TERMINAL_STATES: + self._sleep(poll_interval) + + file = self.retrieve(id) + if time.time() - start > max_wait_seconds: + raise RuntimeError( + f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds." + ) + + return file + + +class AsyncFiles(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncFilesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncFilesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncFilesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncFilesWithStreamingResponse(self) + + async def create( + self, + *, + file: FileTypes, + purpose: FilePurpose, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FileObject: + """Upload a file that can be used across various endpoints. + + Individual files can be + up to 512 MB, and the size of all files uploaded by one organization can be up + to 100 GB. + + The Assistants API supports files up to 2 million tokens and of specific file + types. See the + [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for + details. + + The Fine-tuning API only supports `.jsonl` files. The input also has certain + required formats for fine-tuning + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or + [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) + models. + + The Batch API only supports `.jsonl` files up to 200 MB in size. The input also + has a specific required + [format](https://platform.openai.com/docs/api-reference/batch/request-input). + + Please [contact us](https://help.openai.com/) if you need to increase these + storage limits. + + Args: + file: The File object (not file name) to be uploaded. + + purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the + Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for + fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`: + Flexible file type for any purpose - `evals`: Used for eval data sets + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + body = deepcopy_minimal( + { + "file": file, + "purpose": purpose, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return await self._post( + "/files", + body=await async_maybe_transform(body, file_create_params.FileCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FileObject, + ) + + async def retrieve( + self, + file_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FileObject: + """ + Returns information about a specific file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + return await self._get( + f"/files/{file_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FileObject, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + purpose: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[FileObject, AsyncCursorPage[FileObject]]: + """Returns a list of files. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 10,000, and the default is 10,000. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + purpose: Only return files with the given purpose. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/files", + page=AsyncCursorPage[FileObject], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "order": order, + "purpose": purpose, + }, + file_list_params.FileListParams, + ), + ), + model=FileObject, + ) + + async def delete( + self, + file_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FileDeleted: + """ + Delete a file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + return await self._delete( + f"/files/{file_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FileDeleted, + ) + + async def content( + self, + file_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> _legacy_response.HttpxBinaryResponseContent: + """ + Returns the contents of the specified file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"Accept": "application/binary", **(extra_headers or {})} + return await self._get( + f"/files/{file_id}/content", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=_legacy_response.HttpxBinaryResponseContent, + ) + + @typing_extensions.deprecated("The `.content()` method should be used instead") + async def retrieve_content( + self, + file_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> str: + """ + Returns the contents of the specified file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + return await self._get( + f"/files/{file_id}/content", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=str, + ) + + async def wait_for_processing( + self, + id: str, + *, + poll_interval: float = 5.0, + max_wait_seconds: float = 30 * 60, + ) -> FileObject: + """Waits for the given file to be processed, default timeout is 30 mins.""" + TERMINAL_STATES = {"processed", "error", "deleted"} + + start = time.time() + file = await self.retrieve(id) + while file.status not in TERMINAL_STATES: + await self._sleep(poll_interval) + + file = await self.retrieve(id) + if time.time() - start > max_wait_seconds: + raise RuntimeError( + f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds." + ) + + return file + + +class FilesWithRawResponse: + def __init__(self, files: Files) -> None: + self._files = files + + self.create = _legacy_response.to_raw_response_wrapper( + files.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + files.retrieve, + ) + self.list = _legacy_response.to_raw_response_wrapper( + files.list, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + files.delete, + ) + self.content = _legacy_response.to_raw_response_wrapper( + files.content, + ) + self.retrieve_content = ( # pyright: ignore[reportDeprecated] + _legacy_response.to_raw_response_wrapper( + files.retrieve_content # pyright: ignore[reportDeprecated], + ) + ) + + +class AsyncFilesWithRawResponse: + def __init__(self, files: AsyncFiles) -> None: + self._files = files + + self.create = _legacy_response.async_to_raw_response_wrapper( + files.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + files.retrieve, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + files.list, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + files.delete, + ) + self.content = _legacy_response.async_to_raw_response_wrapper( + files.content, + ) + self.retrieve_content = ( # pyright: ignore[reportDeprecated] + _legacy_response.async_to_raw_response_wrapper( + files.retrieve_content # pyright: ignore[reportDeprecated], + ) + ) + + +class FilesWithStreamingResponse: + def __init__(self, files: Files) -> None: + self._files = files + + self.create = to_streamed_response_wrapper( + files.create, + ) + self.retrieve = to_streamed_response_wrapper( + files.retrieve, + ) + self.list = to_streamed_response_wrapper( + files.list, + ) + self.delete = to_streamed_response_wrapper( + files.delete, + ) + self.content = to_custom_streamed_response_wrapper( + files.content, + StreamedBinaryAPIResponse, + ) + self.retrieve_content = ( # pyright: ignore[reportDeprecated] + to_streamed_response_wrapper( + files.retrieve_content # pyright: ignore[reportDeprecated], + ) + ) + + +class AsyncFilesWithStreamingResponse: + def __init__(self, files: AsyncFiles) -> None: + self._files = files + + self.create = async_to_streamed_response_wrapper( + files.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + files.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + files.list, + ) + self.delete = async_to_streamed_response_wrapper( + files.delete, + ) + self.content = async_to_custom_streamed_response_wrapper( + files.content, + AsyncStreamedBinaryAPIResponse, + ) + self.retrieve_content = ( # pyright: ignore[reportDeprecated] + async_to_streamed_response_wrapper( + files.retrieve_content # pyright: ignore[reportDeprecated], + ) + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/__init__.py new file mode 100644 index 00000000..7765231f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .jobs import ( + Jobs, + AsyncJobs, + JobsWithRawResponse, + AsyncJobsWithRawResponse, + JobsWithStreamingResponse, + AsyncJobsWithStreamingResponse, +) +from .fine_tuning import ( + FineTuning, + AsyncFineTuning, + FineTuningWithRawResponse, + AsyncFineTuningWithRawResponse, + FineTuningWithStreamingResponse, + AsyncFineTuningWithStreamingResponse, +) + +__all__ = [ + "Jobs", + "AsyncJobs", + "JobsWithRawResponse", + "AsyncJobsWithRawResponse", + "JobsWithStreamingResponse", + "AsyncJobsWithStreamingResponse", + "FineTuning", + "AsyncFineTuning", + "FineTuningWithRawResponse", + "AsyncFineTuningWithRawResponse", + "FineTuningWithStreamingResponse", + "AsyncFineTuningWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/fine_tuning.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/fine_tuning.py new file mode 100644 index 00000000..eebde07d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/fine_tuning.py @@ -0,0 +1,102 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from ..._compat import cached_property +from .jobs.jobs import ( + Jobs, + AsyncJobs, + JobsWithRawResponse, + AsyncJobsWithRawResponse, + JobsWithStreamingResponse, + AsyncJobsWithStreamingResponse, +) +from ..._resource import SyncAPIResource, AsyncAPIResource + +__all__ = ["FineTuning", "AsyncFineTuning"] + + +class FineTuning(SyncAPIResource): + @cached_property + def jobs(self) -> Jobs: + return Jobs(self._client) + + @cached_property + def with_raw_response(self) -> FineTuningWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return FineTuningWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> FineTuningWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return FineTuningWithStreamingResponse(self) + + +class AsyncFineTuning(AsyncAPIResource): + @cached_property + def jobs(self) -> AsyncJobs: + return AsyncJobs(self._client) + + @cached_property + def with_raw_response(self) -> AsyncFineTuningWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncFineTuningWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncFineTuningWithStreamingResponse(self) + + +class FineTuningWithRawResponse: + def __init__(self, fine_tuning: FineTuning) -> None: + self._fine_tuning = fine_tuning + + @cached_property + def jobs(self) -> JobsWithRawResponse: + return JobsWithRawResponse(self._fine_tuning.jobs) + + +class AsyncFineTuningWithRawResponse: + def __init__(self, fine_tuning: AsyncFineTuning) -> None: + self._fine_tuning = fine_tuning + + @cached_property + def jobs(self) -> AsyncJobsWithRawResponse: + return AsyncJobsWithRawResponse(self._fine_tuning.jobs) + + +class FineTuningWithStreamingResponse: + def __init__(self, fine_tuning: FineTuning) -> None: + self._fine_tuning = fine_tuning + + @cached_property + def jobs(self) -> JobsWithStreamingResponse: + return JobsWithStreamingResponse(self._fine_tuning.jobs) + + +class AsyncFineTuningWithStreamingResponse: + def __init__(self, fine_tuning: AsyncFineTuning) -> None: + self._fine_tuning = fine_tuning + + @cached_property + def jobs(self) -> AsyncJobsWithStreamingResponse: + return AsyncJobsWithStreamingResponse(self._fine_tuning.jobs) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/__init__.py new file mode 100644 index 00000000..94cd1fb7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .jobs import ( + Jobs, + AsyncJobs, + JobsWithRawResponse, + AsyncJobsWithRawResponse, + JobsWithStreamingResponse, + AsyncJobsWithStreamingResponse, +) +from .checkpoints import ( + Checkpoints, + AsyncCheckpoints, + CheckpointsWithRawResponse, + AsyncCheckpointsWithRawResponse, + CheckpointsWithStreamingResponse, + AsyncCheckpointsWithStreamingResponse, +) + +__all__ = [ + "Checkpoints", + "AsyncCheckpoints", + "CheckpointsWithRawResponse", + "AsyncCheckpointsWithRawResponse", + "CheckpointsWithStreamingResponse", + "AsyncCheckpointsWithStreamingResponse", + "Jobs", + "AsyncJobs", + "JobsWithRawResponse", + "AsyncJobsWithRawResponse", + "JobsWithStreamingResponse", + "AsyncJobsWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/checkpoints.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/checkpoints.py new file mode 100644 index 00000000..f86462e5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/checkpoints.py @@ -0,0 +1,199 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import httpx + +from .... import _legacy_response +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import maybe_transform +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ....pagination import SyncCursorPage, AsyncCursorPage +from ...._base_client import ( + AsyncPaginator, + make_request_options, +) +from ....types.fine_tuning.jobs import checkpoint_list_params +from ....types.fine_tuning.jobs.fine_tuning_job_checkpoint import FineTuningJobCheckpoint + +__all__ = ["Checkpoints", "AsyncCheckpoints"] + + +class Checkpoints(SyncAPIResource): + @cached_property + def with_raw_response(self) -> CheckpointsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return CheckpointsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CheckpointsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return CheckpointsWithStreamingResponse(self) + + def list( + self, + fine_tuning_job_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[FineTuningJobCheckpoint]: + """ + List checkpoints for a fine-tuning job. + + Args: + after: Identifier for the last checkpoint ID from the previous pagination request. + + limit: Number of checkpoints to retrieve. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return self._get_api_list( + f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints", + page=SyncCursorPage[FineTuningJobCheckpoint], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + }, + checkpoint_list_params.CheckpointListParams, + ), + ), + model=FineTuningJobCheckpoint, + ) + + +class AsyncCheckpoints(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncCheckpointsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncCheckpointsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncCheckpointsWithStreamingResponse(self) + + def list( + self, + fine_tuning_job_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]: + """ + List checkpoints for a fine-tuning job. + + Args: + after: Identifier for the last checkpoint ID from the previous pagination request. + + limit: Number of checkpoints to retrieve. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return self._get_api_list( + f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints", + page=AsyncCursorPage[FineTuningJobCheckpoint], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + }, + checkpoint_list_params.CheckpointListParams, + ), + ), + model=FineTuningJobCheckpoint, + ) + + +class CheckpointsWithRawResponse: + def __init__(self, checkpoints: Checkpoints) -> None: + self._checkpoints = checkpoints + + self.list = _legacy_response.to_raw_response_wrapper( + checkpoints.list, + ) + + +class AsyncCheckpointsWithRawResponse: + def __init__(self, checkpoints: AsyncCheckpoints) -> None: + self._checkpoints = checkpoints + + self.list = _legacy_response.async_to_raw_response_wrapper( + checkpoints.list, + ) + + +class CheckpointsWithStreamingResponse: + def __init__(self, checkpoints: Checkpoints) -> None: + self._checkpoints = checkpoints + + self.list = to_streamed_response_wrapper( + checkpoints.list, + ) + + +class AsyncCheckpointsWithStreamingResponse: + def __init__(self, checkpoints: AsyncCheckpoints) -> None: + self._checkpoints = checkpoints + + self.list = async_to_streamed_response_wrapper( + checkpoints.list, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/jobs.py b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/jobs.py new file mode 100644 index 00000000..bbeff60b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/fine_tuning/jobs/jobs.py @@ -0,0 +1,761 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable, Optional +from typing_extensions import Literal + +import httpx + +from .... import _legacy_response +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import ( + maybe_transform, + async_maybe_transform, +) +from ...._compat import cached_property +from .checkpoints import ( + Checkpoints, + AsyncCheckpoints, + CheckpointsWithRawResponse, + AsyncCheckpointsWithRawResponse, + CheckpointsWithStreamingResponse, + AsyncCheckpointsWithStreamingResponse, +) +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ....pagination import SyncCursorPage, AsyncCursorPage +from ...._base_client import ( + AsyncPaginator, + make_request_options, +) +from ....types.fine_tuning import job_list_params, job_create_params, job_list_events_params +from ....types.shared_params.metadata import Metadata +from ....types.fine_tuning.fine_tuning_job import FineTuningJob +from ....types.fine_tuning.fine_tuning_job_event import FineTuningJobEvent + +__all__ = ["Jobs", "AsyncJobs"] + + +class Jobs(SyncAPIResource): + @cached_property + def checkpoints(self) -> Checkpoints: + return Checkpoints(self._client) + + @cached_property + def with_raw_response(self) -> JobsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return JobsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> JobsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return JobsWithStreamingResponse(self) + + def create( + self, + *, + model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]], + training_file: str, + hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN, + integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + method: job_create_params.Method | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + validation_file: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTuningJob: + """ + Creates a fine-tuning job which begins the process of creating a new model from + a given dataset. + + Response includes details of the enqueued job including job status and the name + of the fine-tuned models once complete. + + [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning) + + Args: + model: The name of the model to fine-tune. You can select one of the + [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned). + + training_file: The ID of an uploaded file that contains training data. + + See [upload file](https://platform.openai.com/docs/api-reference/files/create) + for how to upload a file. + + Your dataset must be formatted as a JSONL file. Additionally, you must upload + your file with the purpose `fine-tune`. + + The contents of the file should differ depending on if the model uses the + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), + [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) + format, or if the fine-tuning method uses the + [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) + format. + + See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) + for more details. + + hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated + in favor of `method`, and should be passed in under the `method` parameter. + + integrations: A list of integrations to enable for your fine-tuning job. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + method: The method used for fine-tuning. + + seed: The seed controls the reproducibility of the job. Passing in the same seed and + job parameters should produce the same results, but may differ in rare cases. If + a seed is not specified, one will be generated for you. + + suffix: A string of up to 64 characters that will be added to your fine-tuned model + name. + + For example, a `suffix` of "custom-model-name" would produce a model name like + `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`. + + validation_file: The ID of an uploaded file that contains validation data. + + If you provide this file, the data is used to generate validation metrics + periodically during fine-tuning. These metrics can be viewed in the fine-tuning + results file. The same data should not be present in both train and validation + files. + + Your dataset must be formatted as a JSONL file. You must upload your file with + the purpose `fine-tune`. + + See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) + for more details. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/fine_tuning/jobs", + body=maybe_transform( + { + "model": model, + "training_file": training_file, + "hyperparameters": hyperparameters, + "integrations": integrations, + "metadata": metadata, + "method": method, + "seed": seed, + "suffix": suffix, + "validation_file": validation_file, + }, + job_create_params.JobCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FineTuningJob, + ) + + def retrieve( + self, + fine_tuning_job_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTuningJob: + """ + Get info about a fine-tuning job. + + [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning) + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return self._get( + f"/fine_tuning/jobs/{fine_tuning_job_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FineTuningJob, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[FineTuningJob]: + """ + List your organization's fine-tuning jobs + + Args: + after: Identifier for the last job from the previous pagination request. + + limit: Number of fine-tuning jobs to retrieve. + + metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`. + Alternatively, set `metadata=null` to indicate no metadata. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/fine_tuning/jobs", + page=SyncCursorPage[FineTuningJob], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "metadata": metadata, + }, + job_list_params.JobListParams, + ), + ), + model=FineTuningJob, + ) + + def cancel( + self, + fine_tuning_job_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTuningJob: + """ + Immediately cancel a fine-tune job. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return self._post( + f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FineTuningJob, + ) + + def list_events( + self, + fine_tuning_job_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[FineTuningJobEvent]: + """ + Get status updates for a fine-tuning job. + + Args: + after: Identifier for the last event from the previous pagination request. + + limit: Number of events to retrieve. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return self._get_api_list( + f"/fine_tuning/jobs/{fine_tuning_job_id}/events", + page=SyncCursorPage[FineTuningJobEvent], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + }, + job_list_events_params.JobListEventsParams, + ), + ), + model=FineTuningJobEvent, + ) + + +class AsyncJobs(AsyncAPIResource): + @cached_property + def checkpoints(self) -> AsyncCheckpoints: + return AsyncCheckpoints(self._client) + + @cached_property + def with_raw_response(self) -> AsyncJobsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncJobsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncJobsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncJobsWithStreamingResponse(self) + + async def create( + self, + *, + model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]], + training_file: str, + hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN, + integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + method: job_create_params.Method | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + validation_file: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTuningJob: + """ + Creates a fine-tuning job which begins the process of creating a new model from + a given dataset. + + Response includes details of the enqueued job including job status and the name + of the fine-tuned models once complete. + + [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning) + + Args: + model: The name of the model to fine-tune. You can select one of the + [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned). + + training_file: The ID of an uploaded file that contains training data. + + See [upload file](https://platform.openai.com/docs/api-reference/files/create) + for how to upload a file. + + Your dataset must be formatted as a JSONL file. Additionally, you must upload + your file with the purpose `fine-tune`. + + The contents of the file should differ depending on if the model uses the + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), + [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) + format, or if the fine-tuning method uses the + [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) + format. + + See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) + for more details. + + hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated + in favor of `method`, and should be passed in under the `method` parameter. + + integrations: A list of integrations to enable for your fine-tuning job. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + method: The method used for fine-tuning. + + seed: The seed controls the reproducibility of the job. Passing in the same seed and + job parameters should produce the same results, but may differ in rare cases. If + a seed is not specified, one will be generated for you. + + suffix: A string of up to 64 characters that will be added to your fine-tuned model + name. + + For example, a `suffix` of "custom-model-name" would produce a model name like + `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`. + + validation_file: The ID of an uploaded file that contains validation data. + + If you provide this file, the data is used to generate validation metrics + periodically during fine-tuning. These metrics can be viewed in the fine-tuning + results file. The same data should not be present in both train and validation + files. + + Your dataset must be formatted as a JSONL file. You must upload your file with + the purpose `fine-tune`. + + See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) + for more details. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/fine_tuning/jobs", + body=await async_maybe_transform( + { + "model": model, + "training_file": training_file, + "hyperparameters": hyperparameters, + "integrations": integrations, + "metadata": metadata, + "method": method, + "seed": seed, + "suffix": suffix, + "validation_file": validation_file, + }, + job_create_params.JobCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FineTuningJob, + ) + + async def retrieve( + self, + fine_tuning_job_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTuningJob: + """ + Get info about a fine-tuning job. + + [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning) + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return await self._get( + f"/fine_tuning/jobs/{fine_tuning_job_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FineTuningJob, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[FineTuningJob, AsyncCursorPage[FineTuningJob]]: + """ + List your organization's fine-tuning jobs + + Args: + after: Identifier for the last job from the previous pagination request. + + limit: Number of fine-tuning jobs to retrieve. + + metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`. + Alternatively, set `metadata=null` to indicate no metadata. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/fine_tuning/jobs", + page=AsyncCursorPage[FineTuningJob], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "metadata": metadata, + }, + job_list_params.JobListParams, + ), + ), + model=FineTuningJob, + ) + + async def cancel( + self, + fine_tuning_job_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTuningJob: + """ + Immediately cancel a fine-tune job. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return await self._post( + f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FineTuningJob, + ) + + def list_events( + self, + fine_tuning_job_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[FineTuningJobEvent, AsyncCursorPage[FineTuningJobEvent]]: + """ + Get status updates for a fine-tuning job. + + Args: + after: Identifier for the last event from the previous pagination request. + + limit: Number of events to retrieve. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return self._get_api_list( + f"/fine_tuning/jobs/{fine_tuning_job_id}/events", + page=AsyncCursorPage[FineTuningJobEvent], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + }, + job_list_events_params.JobListEventsParams, + ), + ), + model=FineTuningJobEvent, + ) + + +class JobsWithRawResponse: + def __init__(self, jobs: Jobs) -> None: + self._jobs = jobs + + self.create = _legacy_response.to_raw_response_wrapper( + jobs.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + jobs.retrieve, + ) + self.list = _legacy_response.to_raw_response_wrapper( + jobs.list, + ) + self.cancel = _legacy_response.to_raw_response_wrapper( + jobs.cancel, + ) + self.list_events = _legacy_response.to_raw_response_wrapper( + jobs.list_events, + ) + + @cached_property + def checkpoints(self) -> CheckpointsWithRawResponse: + return CheckpointsWithRawResponse(self._jobs.checkpoints) + + +class AsyncJobsWithRawResponse: + def __init__(self, jobs: AsyncJobs) -> None: + self._jobs = jobs + + self.create = _legacy_response.async_to_raw_response_wrapper( + jobs.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + jobs.retrieve, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + jobs.list, + ) + self.cancel = _legacy_response.async_to_raw_response_wrapper( + jobs.cancel, + ) + self.list_events = _legacy_response.async_to_raw_response_wrapper( + jobs.list_events, + ) + + @cached_property + def checkpoints(self) -> AsyncCheckpointsWithRawResponse: + return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints) + + +class JobsWithStreamingResponse: + def __init__(self, jobs: Jobs) -> None: + self._jobs = jobs + + self.create = to_streamed_response_wrapper( + jobs.create, + ) + self.retrieve = to_streamed_response_wrapper( + jobs.retrieve, + ) + self.list = to_streamed_response_wrapper( + jobs.list, + ) + self.cancel = to_streamed_response_wrapper( + jobs.cancel, + ) + self.list_events = to_streamed_response_wrapper( + jobs.list_events, + ) + + @cached_property + def checkpoints(self) -> CheckpointsWithStreamingResponse: + return CheckpointsWithStreamingResponse(self._jobs.checkpoints) + + +class AsyncJobsWithStreamingResponse: + def __init__(self, jobs: AsyncJobs) -> None: + self._jobs = jobs + + self.create = async_to_streamed_response_wrapper( + jobs.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + jobs.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + jobs.list, + ) + self.cancel = async_to_streamed_response_wrapper( + jobs.cancel, + ) + self.list_events = async_to_streamed_response_wrapper( + jobs.list_events, + ) + + @cached_property + def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse: + return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/images.py b/.venv/lib/python3.12/site-packages/openai/resources/images.py new file mode 100644 index 00000000..30473c14 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/images.py @@ -0,0 +1,600 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Mapping, Optional, cast +from typing_extensions import Literal + +import httpx + +from .. import _legacy_response +from ..types import image_edit_params, image_generate_params, image_create_variation_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes +from .._utils import ( + extract_files, + maybe_transform, + deepcopy_minimal, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from .._base_client import make_request_options +from ..types.image_model import ImageModel +from ..types.images_response import ImagesResponse + +__all__ = ["Images", "AsyncImages"] + + +class Images(SyncAPIResource): + @cached_property + def with_raw_response(self) -> ImagesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return ImagesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ImagesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return ImagesWithStreamingResponse(self) + + def create_variation( + self, + *, + image: FileTypes, + model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN, + size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ImagesResponse: + """ + Creates a variation of a given image. + + Args: + image: The image to use as the basis for the variation(s). Must be a valid PNG file, + less than 4MB, and square. + + model: The model to use for image generation. Only `dall-e-2` is supported at this + time. + + n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only + `n=1` is supported. + + response_format: The format in which the generated images are returned. Must be one of `url` or + `b64_json`. URLs are only valid for 60 minutes after the image has been + generated. + + size: The size of the generated images. Must be one of `256x256`, `512x512`, or + `1024x1024`. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + body = deepcopy_minimal( + { + "image": image, + "model": model, + "n": n, + "response_format": response_format, + "size": size, + "user": user, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["image"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return self._post( + "/images/variations", + body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ImagesResponse, + ) + + def edit( + self, + *, + image: FileTypes, + prompt: str, + mask: FileTypes | NotGiven = NOT_GIVEN, + model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN, + size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ImagesResponse: + """ + Creates an edited or extended image given an original image and a prompt. + + Args: + image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask + is not provided, image must have transparency, which will be used as the mask. + + prompt: A text description of the desired image(s). The maximum length is 1000 + characters. + + mask: An additional image whose fully transparent areas (e.g. where alpha is zero) + indicate where `image` should be edited. Must be a valid PNG file, less than + 4MB, and have the same dimensions as `image`. + + model: The model to use for image generation. Only `dall-e-2` is supported at this + time. + + n: The number of images to generate. Must be between 1 and 10. + + response_format: The format in which the generated images are returned. Must be one of `url` or + `b64_json`. URLs are only valid for 60 minutes after the image has been + generated. + + size: The size of the generated images. Must be one of `256x256`, `512x512`, or + `1024x1024`. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + body = deepcopy_minimal( + { + "image": image, + "prompt": prompt, + "mask": mask, + "model": model, + "n": n, + "response_format": response_format, + "size": size, + "user": user, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return self._post( + "/images/edits", + body=maybe_transform(body, image_edit_params.ImageEditParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ImagesResponse, + ) + + def generate( + self, + *, + prompt: str, + model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN, + response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN, + size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN, + style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ImagesResponse: + """ + Creates an image given a prompt. + + Args: + prompt: A text description of the desired image(s). The maximum length is 1000 + characters for `dall-e-2` and 4000 characters for `dall-e-3`. + + model: The model to use for image generation. + + n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only + `n=1` is supported. + + quality: The quality of the image that will be generated. `hd` creates images with finer + details and greater consistency across the image. This param is only supported + for `dall-e-3`. + + response_format: The format in which the generated images are returned. Must be one of `url` or + `b64_json`. URLs are only valid for 60 minutes after the image has been + generated. + + size: The size of the generated images. Must be one of `256x256`, `512x512`, or + `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or + `1024x1792` for `dall-e-3` models. + + style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid + causes the model to lean towards generating hyper-real and dramatic images. + Natural causes the model to produce more natural, less hyper-real looking + images. This param is only supported for `dall-e-3`. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/images/generations", + body=maybe_transform( + { + "prompt": prompt, + "model": model, + "n": n, + "quality": quality, + "response_format": response_format, + "size": size, + "style": style, + "user": user, + }, + image_generate_params.ImageGenerateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ImagesResponse, + ) + + +class AsyncImages(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncImagesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncImagesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncImagesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncImagesWithStreamingResponse(self) + + async def create_variation( + self, + *, + image: FileTypes, + model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN, + size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ImagesResponse: + """ + Creates a variation of a given image. + + Args: + image: The image to use as the basis for the variation(s). Must be a valid PNG file, + less than 4MB, and square. + + model: The model to use for image generation. Only `dall-e-2` is supported at this + time. + + n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only + `n=1` is supported. + + response_format: The format in which the generated images are returned. Must be one of `url` or + `b64_json`. URLs are only valid for 60 minutes after the image has been + generated. + + size: The size of the generated images. Must be one of `256x256`, `512x512`, or + `1024x1024`. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + body = deepcopy_minimal( + { + "image": image, + "model": model, + "n": n, + "response_format": response_format, + "size": size, + "user": user, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["image"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return await self._post( + "/images/variations", + body=await async_maybe_transform(body, image_create_variation_params.ImageCreateVariationParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ImagesResponse, + ) + + async def edit( + self, + *, + image: FileTypes, + prompt: str, + mask: FileTypes | NotGiven = NOT_GIVEN, + model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN, + size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ImagesResponse: + """ + Creates an edited or extended image given an original image and a prompt. + + Args: + image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask + is not provided, image must have transparency, which will be used as the mask. + + prompt: A text description of the desired image(s). The maximum length is 1000 + characters. + + mask: An additional image whose fully transparent areas (e.g. where alpha is zero) + indicate where `image` should be edited. Must be a valid PNG file, less than + 4MB, and have the same dimensions as `image`. + + model: The model to use for image generation. Only `dall-e-2` is supported at this + time. + + n: The number of images to generate. Must be between 1 and 10. + + response_format: The format in which the generated images are returned. Must be one of `url` or + `b64_json`. URLs are only valid for 60 minutes after the image has been + generated. + + size: The size of the generated images. Must be one of `256x256`, `512x512`, or + `1024x1024`. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + body = deepcopy_minimal( + { + "image": image, + "prompt": prompt, + "mask": mask, + "model": model, + "n": n, + "response_format": response_format, + "size": size, + "user": user, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return await self._post( + "/images/edits", + body=await async_maybe_transform(body, image_edit_params.ImageEditParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ImagesResponse, + ) + + async def generate( + self, + *, + prompt: str, + model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN, + response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN, + size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN, + style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ImagesResponse: + """ + Creates an image given a prompt. + + Args: + prompt: A text description of the desired image(s). The maximum length is 1000 + characters for `dall-e-2` and 4000 characters for `dall-e-3`. + + model: The model to use for image generation. + + n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only + `n=1` is supported. + + quality: The quality of the image that will be generated. `hd` creates images with finer + details and greater consistency across the image. This param is only supported + for `dall-e-3`. + + response_format: The format in which the generated images are returned. Must be one of `url` or + `b64_json`. URLs are only valid for 60 minutes after the image has been + generated. + + size: The size of the generated images. Must be one of `256x256`, `512x512`, or + `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or + `1024x1792` for `dall-e-3` models. + + style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid + causes the model to lean towards generating hyper-real and dramatic images. + Natural causes the model to produce more natural, less hyper-real looking + images. This param is only supported for `dall-e-3`. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/images/generations", + body=await async_maybe_transform( + { + "prompt": prompt, + "model": model, + "n": n, + "quality": quality, + "response_format": response_format, + "size": size, + "style": style, + "user": user, + }, + image_generate_params.ImageGenerateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ImagesResponse, + ) + + +class ImagesWithRawResponse: + def __init__(self, images: Images) -> None: + self._images = images + + self.create_variation = _legacy_response.to_raw_response_wrapper( + images.create_variation, + ) + self.edit = _legacy_response.to_raw_response_wrapper( + images.edit, + ) + self.generate = _legacy_response.to_raw_response_wrapper( + images.generate, + ) + + +class AsyncImagesWithRawResponse: + def __init__(self, images: AsyncImages) -> None: + self._images = images + + self.create_variation = _legacy_response.async_to_raw_response_wrapper( + images.create_variation, + ) + self.edit = _legacy_response.async_to_raw_response_wrapper( + images.edit, + ) + self.generate = _legacy_response.async_to_raw_response_wrapper( + images.generate, + ) + + +class ImagesWithStreamingResponse: + def __init__(self, images: Images) -> None: + self._images = images + + self.create_variation = to_streamed_response_wrapper( + images.create_variation, + ) + self.edit = to_streamed_response_wrapper( + images.edit, + ) + self.generate = to_streamed_response_wrapper( + images.generate, + ) + + +class AsyncImagesWithStreamingResponse: + def __init__(self, images: AsyncImages) -> None: + self._images = images + + self.create_variation = async_to_streamed_response_wrapper( + images.create_variation, + ) + self.edit = async_to_streamed_response_wrapper( + images.edit, + ) + self.generate = async_to_streamed_response_wrapper( + images.generate, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/models.py b/.venv/lib/python3.12/site-packages/openai/resources/models.py new file mode 100644 index 00000000..a9693a6b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/models.py @@ -0,0 +1,306 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import httpx + +from .. import _legacy_response +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ..pagination import SyncPage, AsyncPage +from ..types.model import Model +from .._base_client import ( + AsyncPaginator, + make_request_options, +) +from ..types.model_deleted import ModelDeleted + +__all__ = ["Models", "AsyncModels"] + + +class Models(SyncAPIResource): + @cached_property + def with_raw_response(self) -> ModelsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return ModelsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ModelsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return ModelsWithStreamingResponse(self) + + def retrieve( + self, + model: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Model: + """ + Retrieves a model instance, providing basic information about the model such as + the owner and permissioning. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not model: + raise ValueError(f"Expected a non-empty value for `model` but received {model!r}") + return self._get( + f"/models/{model}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Model, + ) + + def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncPage[Model]: + """ + Lists the currently available models, and provides basic information about each + one such as the owner and availability. + """ + return self._get_api_list( + "/models", + page=SyncPage[Model], + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + model=Model, + ) + + def delete( + self, + model: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ModelDeleted: + """Delete a fine-tuned model. + + You must have the Owner role in your organization to + delete a model. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not model: + raise ValueError(f"Expected a non-empty value for `model` but received {model!r}") + return self._delete( + f"/models/{model}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ModelDeleted, + ) + + +class AsyncModels(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncModelsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncModelsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncModelsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncModelsWithStreamingResponse(self) + + async def retrieve( + self, + model: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Model: + """ + Retrieves a model instance, providing basic information about the model such as + the owner and permissioning. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not model: + raise ValueError(f"Expected a non-empty value for `model` but received {model!r}") + return await self._get( + f"/models/{model}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Model, + ) + + def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[Model, AsyncPage[Model]]: + """ + Lists the currently available models, and provides basic information about each + one such as the owner and availability. + """ + return self._get_api_list( + "/models", + page=AsyncPage[Model], + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + model=Model, + ) + + async def delete( + self, + model: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ModelDeleted: + """Delete a fine-tuned model. + + You must have the Owner role in your organization to + delete a model. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not model: + raise ValueError(f"Expected a non-empty value for `model` but received {model!r}") + return await self._delete( + f"/models/{model}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ModelDeleted, + ) + + +class ModelsWithRawResponse: + def __init__(self, models: Models) -> None: + self._models = models + + self.retrieve = _legacy_response.to_raw_response_wrapper( + models.retrieve, + ) + self.list = _legacy_response.to_raw_response_wrapper( + models.list, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + models.delete, + ) + + +class AsyncModelsWithRawResponse: + def __init__(self, models: AsyncModels) -> None: + self._models = models + + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + models.retrieve, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + models.list, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + models.delete, + ) + + +class ModelsWithStreamingResponse: + def __init__(self, models: Models) -> None: + self._models = models + + self.retrieve = to_streamed_response_wrapper( + models.retrieve, + ) + self.list = to_streamed_response_wrapper( + models.list, + ) + self.delete = to_streamed_response_wrapper( + models.delete, + ) + + +class AsyncModelsWithStreamingResponse: + def __init__(self, models: AsyncModels) -> None: + self._models = models + + self.retrieve = async_to_streamed_response_wrapper( + models.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + models.list, + ) + self.delete = async_to_streamed_response_wrapper( + models.delete, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/moderations.py b/.venv/lib/python3.12/site-packages/openai/resources/moderations.py new file mode 100644 index 00000000..a8f03142 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/moderations.py @@ -0,0 +1,200 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Union, Iterable + +import httpx + +from .. import _legacy_response +from ..types import moderation_create_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import ( + maybe_transform, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from .._base_client import make_request_options +from ..types.moderation_model import ModerationModel +from ..types.moderation_create_response import ModerationCreateResponse +from ..types.moderation_multi_modal_input_param import ModerationMultiModalInputParam + +__all__ = ["Moderations", "AsyncModerations"] + + +class Moderations(SyncAPIResource): + @cached_property + def with_raw_response(self) -> ModerationsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return ModerationsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ModerationsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return ModerationsWithStreamingResponse(self) + + def create( + self, + *, + input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]], + model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ModerationCreateResponse: + """Classifies if text and/or image inputs are potentially harmful. + + Learn more in + the [moderation guide](https://platform.openai.com/docs/guides/moderation). + + Args: + input: Input (or inputs) to classify. Can be a single string, an array of strings, or + an array of multi-modal input objects similar to other models. + + model: The content moderation model you would like to use. Learn more in + [the moderation guide](https://platform.openai.com/docs/guides/moderation), and + learn about available models + [here](https://platform.openai.com/docs/models#moderation). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/moderations", + body=maybe_transform( + { + "input": input, + "model": model, + }, + moderation_create_params.ModerationCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ModerationCreateResponse, + ) + + +class AsyncModerations(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncModerationsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncModerationsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncModerationsWithStreamingResponse(self) + + async def create( + self, + *, + input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]], + model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ModerationCreateResponse: + """Classifies if text and/or image inputs are potentially harmful. + + Learn more in + the [moderation guide](https://platform.openai.com/docs/guides/moderation). + + Args: + input: Input (or inputs) to classify. Can be a single string, an array of strings, or + an array of multi-modal input objects similar to other models. + + model: The content moderation model you would like to use. Learn more in + [the moderation guide](https://platform.openai.com/docs/guides/moderation), and + learn about available models + [here](https://platform.openai.com/docs/models#moderation). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/moderations", + body=await async_maybe_transform( + { + "input": input, + "model": model, + }, + moderation_create_params.ModerationCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ModerationCreateResponse, + ) + + +class ModerationsWithRawResponse: + def __init__(self, moderations: Moderations) -> None: + self._moderations = moderations + + self.create = _legacy_response.to_raw_response_wrapper( + moderations.create, + ) + + +class AsyncModerationsWithRawResponse: + def __init__(self, moderations: AsyncModerations) -> None: + self._moderations = moderations + + self.create = _legacy_response.async_to_raw_response_wrapper( + moderations.create, + ) + + +class ModerationsWithStreamingResponse: + def __init__(self, moderations: Moderations) -> None: + self._moderations = moderations + + self.create = to_streamed_response_wrapper( + moderations.create, + ) + + +class AsyncModerationsWithStreamingResponse: + def __init__(self, moderations: AsyncModerations) -> None: + self._moderations = moderations + + self.create = async_to_streamed_response_wrapper( + moderations.create, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/responses/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/responses/__init__.py new file mode 100644 index 00000000..ad19218b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/responses/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .responses import ( + Responses, + AsyncResponses, + ResponsesWithRawResponse, + AsyncResponsesWithRawResponse, + ResponsesWithStreamingResponse, + AsyncResponsesWithStreamingResponse, +) +from .input_items import ( + InputItems, + AsyncInputItems, + InputItemsWithRawResponse, + AsyncInputItemsWithRawResponse, + InputItemsWithStreamingResponse, + AsyncInputItemsWithStreamingResponse, +) + +__all__ = [ + "InputItems", + "AsyncInputItems", + "InputItemsWithRawResponse", + "AsyncInputItemsWithRawResponse", + "InputItemsWithStreamingResponse", + "AsyncInputItemsWithStreamingResponse", + "Responses", + "AsyncResponses", + "ResponsesWithRawResponse", + "AsyncResponsesWithRawResponse", + "ResponsesWithStreamingResponse", + "AsyncResponsesWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/responses/input_items.py b/.venv/lib/python3.12/site-packages/openai/resources/responses/input_items.py new file mode 100644 index 00000000..e341393c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/responses/input_items.py @@ -0,0 +1,223 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Any, cast +from typing_extensions import Literal + +import httpx + +from ... import _legacy_response +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._utils import maybe_transform +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...pagination import SyncCursorPage, AsyncCursorPage +from ..._base_client import AsyncPaginator, make_request_options +from ...types.responses import input_item_list_params +from ...types.responses.response_item import ResponseItem + +__all__ = ["InputItems", "AsyncInputItems"] + + +class InputItems(SyncAPIResource): + @cached_property + def with_raw_response(self) -> InputItemsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return InputItemsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> InputItemsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return InputItemsWithStreamingResponse(self) + + def list( + self, + response_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[ResponseItem]: + """ + Returns a list of input items for a given response. + + Args: + after: An item ID to list items after, used in pagination. + + before: An item ID to list items before, used in pagination. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: The order to return the input items in. Default is `asc`. + + - `asc`: Return the input items in ascending order. + - `desc`: Return the input items in descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + return self._get_api_list( + f"/responses/{response_id}/input_items", + page=SyncCursorPage[ResponseItem], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + }, + input_item_list_params.InputItemListParams, + ), + ), + model=cast(Any, ResponseItem), # Union types cannot be passed in as arguments in the type system + ) + + +class AsyncInputItems(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncInputItemsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncInputItemsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncInputItemsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncInputItemsWithStreamingResponse(self) + + def list( + self, + response_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[ResponseItem, AsyncCursorPage[ResponseItem]]: + """ + Returns a list of input items for a given response. + + Args: + after: An item ID to list items after, used in pagination. + + before: An item ID to list items before, used in pagination. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: The order to return the input items in. Default is `asc`. + + - `asc`: Return the input items in ascending order. + - `desc`: Return the input items in descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + return self._get_api_list( + f"/responses/{response_id}/input_items", + page=AsyncCursorPage[ResponseItem], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + }, + input_item_list_params.InputItemListParams, + ), + ), + model=cast(Any, ResponseItem), # Union types cannot be passed in as arguments in the type system + ) + + +class InputItemsWithRawResponse: + def __init__(self, input_items: InputItems) -> None: + self._input_items = input_items + + self.list = _legacy_response.to_raw_response_wrapper( + input_items.list, + ) + + +class AsyncInputItemsWithRawResponse: + def __init__(self, input_items: AsyncInputItems) -> None: + self._input_items = input_items + + self.list = _legacy_response.async_to_raw_response_wrapper( + input_items.list, + ) + + +class InputItemsWithStreamingResponse: + def __init__(self, input_items: InputItems) -> None: + self._input_items = input_items + + self.list = to_streamed_response_wrapper( + input_items.list, + ) + + +class AsyncInputItemsWithStreamingResponse: + def __init__(self, input_items: AsyncInputItems) -> None: + self._input_items = input_items + + self.list = async_to_streamed_response_wrapper( + input_items.list, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py b/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py new file mode 100644 index 00000000..668f4db8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py @@ -0,0 +1,1791 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Any, List, Type, Union, Iterable, Optional, cast +from functools import partial +from typing_extensions import Literal, overload + +import httpx + +from ... import _legacy_response +from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven +from ..._utils import ( + is_given, + required_args, + maybe_transform, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from .input_items import ( + InputItems, + AsyncInputItems, + InputItemsWithRawResponse, + AsyncInputItemsWithRawResponse, + InputItemsWithStreamingResponse, + AsyncInputItemsWithStreamingResponse, +) +from ..._streaming import Stream, AsyncStream +from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool +from ..._base_client import make_request_options +from ...types.responses import response_create_params, response_retrieve_params +from ...lib._parsing._responses import ( + TextFormatT, + parse_response, + type_to_text_format_param as _type_to_text_format_param, +) +from ...types.shared.chat_model import ChatModel +from ...types.responses.response import Response +from ...types.responses.tool_param import ToolParam, ParseableToolParam +from ...types.shared_params.metadata import Metadata +from ...types.shared_params.reasoning import Reasoning +from ...types.responses.parsed_response import ParsedResponse +from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager +from ...types.responses.response_includable import ResponseIncludable +from ...types.shared_params.responses_model import ResponsesModel +from ...types.responses.response_input_param import ResponseInputParam +from ...types.responses.response_stream_event import ResponseStreamEvent +from ...types.responses.response_text_config_param import ResponseTextConfigParam + +__all__ = ["Responses", "AsyncResponses"] + + +class Responses(SyncAPIResource): + @cached_property + def input_items(self) -> InputItems: + return InputItems(self._client) + + @cached_property + def with_raw_response(self) -> ResponsesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return ResponsesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ResponsesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return ResponsesWithStreamingResponse(self) + + @overload + def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + stream: Literal[True], + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[ResponseStreamEvent]: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + stream: bool, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response | Stream[ResponseStreamEvent]: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["input", "model"], ["input", "model", "stream"]) + def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response | Stream[ResponseStreamEvent]: + return self._post( + "/responses", + body=maybe_transform( + { + "input": input, + "model": model, + "include": include, + "instructions": instructions, + "max_output_tokens": max_output_tokens, + "metadata": metadata, + "parallel_tool_calls": parallel_tool_calls, + "previous_response_id": previous_response_id, + "reasoning": reasoning, + "store": store, + "stream": stream, + "temperature": temperature, + "text": text, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation": truncation, + "user": user, + }, + response_create_params.ResponseCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Response, + stream=stream or False, + stream_cls=Stream[ResponseStreamEvent], + ) + + def stream( + self, + *, + input: Union[str, ResponseInputParam], + model: Union[str, ChatModel], + text_format: type[TextFormatT] | NotGiven = NOT_GIVEN, + tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseStreamManager[TextFormatT]: + if is_given(text_format): + if not text: + text = {} + + if "format" in text: + raise TypeError("Cannot mix and match text.format with text_format") + + text["format"] = _type_to_text_format_param(text_format) + + tools = _make_tools(tools) + + api_request: partial[Stream[ResponseStreamEvent]] = partial( + self.create, + input=input, + model=model, + tools=tools, + include=include, + instructions=instructions, + max_output_tokens=max_output_tokens, + metadata=metadata, + parallel_tool_calls=parallel_tool_calls, + previous_response_id=previous_response_id, + store=store, + stream=True, + temperature=temperature, + text=text, + tool_choice=tool_choice, + reasoning=reasoning, + top_p=top_p, + truncation=truncation, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + + return ResponseStreamManager( + api_request, + text_format=text_format, + input_tools=tools, + ) + + def parse( + self, + *, + input: Union[str, ResponseInputParam], + model: Union[str, ChatModel], + text_format: type[TextFormatT] | NotGiven = NOT_GIVEN, + tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ParsedResponse[TextFormatT]: + if is_given(text_format): + if not text: + text = {} + + if "format" in text: + raise TypeError("Cannot mix and match text.format with text_format") + + text["format"] = _type_to_text_format_param(text_format) + + tools = _make_tools(tools) + + def parser(raw_response: Response) -> ParsedResponse[TextFormatT]: + return parse_response( + input_tools=tools, + text_format=text_format, + response=raw_response, + ) + + return self._post( + "/responses", + body=maybe_transform( + { + "input": input, + "model": model, + "include": include, + "instructions": instructions, + "max_output_tokens": max_output_tokens, + "metadata": metadata, + "parallel_tool_calls": parallel_tool_calls, + "previous_response_id": previous_response_id, + "reasoning": reasoning, + "store": store, + "stream": stream, + "temperature": temperature, + "text": text, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation": truncation, + "user": user, + }, + response_create_params.ResponseCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + # we turn the `Response` instance into a `ParsedResponse` + # in the `parser` function above + cast_to=cast(Type[ParsedResponse[TextFormatT]], Response), + ) + + def retrieve( + self, + response_id: str, + *, + include: List[ResponseIncludable] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response: + """ + Retrieves a model response with the given ID. + + Args: + include: Additional fields to include in the response. See the `include` parameter for + Response creation above for more information. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + return self._get( + f"/responses/{response_id}", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"include": include}, response_retrieve_params.ResponseRetrieveParams), + ), + cast_to=Response, + ) + + def delete( + self, + response_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Deletes a model response with the given ID. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return self._delete( + f"/responses/{response_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class AsyncResponses(AsyncAPIResource): + @cached_property + def input_items(self) -> AsyncInputItems: + return AsyncInputItems(self._client) + + @cached_property + def with_raw_response(self) -> AsyncResponsesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncResponsesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncResponsesWithStreamingResponse(self) + + @overload + async def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + stream: Literal[True], + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ResponseStreamEvent]: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + stream: bool, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response | AsyncStream[ResponseStreamEvent]: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["input", "model"], ["input", "model", "stream"]) + async def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response | AsyncStream[ResponseStreamEvent]: + return await self._post( + "/responses", + body=await async_maybe_transform( + { + "input": input, + "model": model, + "include": include, + "instructions": instructions, + "max_output_tokens": max_output_tokens, + "metadata": metadata, + "parallel_tool_calls": parallel_tool_calls, + "previous_response_id": previous_response_id, + "reasoning": reasoning, + "store": store, + "stream": stream, + "temperature": temperature, + "text": text, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation": truncation, + "user": user, + }, + response_create_params.ResponseCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Response, + stream=stream or False, + stream_cls=AsyncStream[ResponseStreamEvent], + ) + + def stream( + self, + *, + input: Union[str, ResponseInputParam], + model: Union[str, ChatModel], + text_format: type[TextFormatT] | NotGiven = NOT_GIVEN, + tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncResponseStreamManager[TextFormatT]: + if is_given(text_format): + if not text: + text = {} + + if "format" in text: + raise TypeError("Cannot mix and match text.format with text_format") + + text["format"] = _type_to_text_format_param(text_format) + + tools = _make_tools(tools) + + api_request = self.create( + input=input, + model=model, + tools=tools, + include=include, + instructions=instructions, + max_output_tokens=max_output_tokens, + metadata=metadata, + parallel_tool_calls=parallel_tool_calls, + previous_response_id=previous_response_id, + store=store, + stream=True, + temperature=temperature, + text=text, + tool_choice=tool_choice, + reasoning=reasoning, + top_p=top_p, + truncation=truncation, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + + return AsyncResponseStreamManager( + api_request, + text_format=text_format, + input_tools=tools, + ) + + async def parse( + self, + *, + input: Union[str, ResponseInputParam], + model: Union[str, ChatModel], + text_format: type[TextFormatT] | NotGiven = NOT_GIVEN, + tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ParsedResponse[TextFormatT]: + if is_given(text_format): + if not text: + text = {} + + if "format" in text: + raise TypeError("Cannot mix and match text.format with text_format") + + text["format"] = _type_to_text_format_param(text_format) + + tools = _make_tools(tools) + + def parser(raw_response: Response) -> ParsedResponse[TextFormatT]: + return parse_response( + input_tools=tools, + text_format=text_format, + response=raw_response, + ) + + return await self._post( + "/responses", + body=maybe_transform( + { + "input": input, + "model": model, + "include": include, + "instructions": instructions, + "max_output_tokens": max_output_tokens, + "metadata": metadata, + "parallel_tool_calls": parallel_tool_calls, + "previous_response_id": previous_response_id, + "reasoning": reasoning, + "store": store, + "stream": stream, + "temperature": temperature, + "text": text, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation": truncation, + "user": user, + }, + response_create_params.ResponseCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + # we turn the `Response` instance into a `ParsedResponse` + # in the `parser` function above + cast_to=cast(Type[ParsedResponse[TextFormatT]], Response), + ) + + async def retrieve( + self, + response_id: str, + *, + include: List[ResponseIncludable] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response: + """ + Retrieves a model response with the given ID. + + Args: + include: Additional fields to include in the response. See the `include` parameter for + Response creation above for more information. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + return await self._get( + f"/responses/{response_id}", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform( + {"include": include}, response_retrieve_params.ResponseRetrieveParams + ), + ), + cast_to=Response, + ) + + async def delete( + self, + response_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Deletes a model response with the given ID. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return await self._delete( + f"/responses/{response_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class ResponsesWithRawResponse: + def __init__(self, responses: Responses) -> None: + self._responses = responses + + self.create = _legacy_response.to_raw_response_wrapper( + responses.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + responses.retrieve, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + responses.delete, + ) + + @cached_property + def input_items(self) -> InputItemsWithRawResponse: + return InputItemsWithRawResponse(self._responses.input_items) + + +class AsyncResponsesWithRawResponse: + def __init__(self, responses: AsyncResponses) -> None: + self._responses = responses + + self.create = _legacy_response.async_to_raw_response_wrapper( + responses.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + responses.retrieve, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + responses.delete, + ) + + @cached_property + def input_items(self) -> AsyncInputItemsWithRawResponse: + return AsyncInputItemsWithRawResponse(self._responses.input_items) + + +class ResponsesWithStreamingResponse: + def __init__(self, responses: Responses) -> None: + self._responses = responses + + self.create = to_streamed_response_wrapper( + responses.create, + ) + self.retrieve = to_streamed_response_wrapper( + responses.retrieve, + ) + self.delete = to_streamed_response_wrapper( + responses.delete, + ) + + @cached_property + def input_items(self) -> InputItemsWithStreamingResponse: + return InputItemsWithStreamingResponse(self._responses.input_items) + + +class AsyncResponsesWithStreamingResponse: + def __init__(self, responses: AsyncResponses) -> None: + self._responses = responses + + self.create = async_to_streamed_response_wrapper( + responses.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + responses.retrieve, + ) + self.delete = async_to_streamed_response_wrapper( + responses.delete, + ) + + @cached_property + def input_items(self) -> AsyncInputItemsWithStreamingResponse: + return AsyncInputItemsWithStreamingResponse(self._responses.input_items) + + +def _make_tools(tools: Iterable[ParseableToolParam] | NotGiven) -> List[ToolParam] | NotGiven: + if not is_given(tools): + return NOT_GIVEN + + converted_tools: List[ToolParam] = [] + for tool in tools: + if tool["type"] != "function": + converted_tools.append(tool) + continue + + if "function" not in tool: + # standard Responses API case + converted_tools.append(tool) + continue + + function = cast(Any, tool)["function"] # pyright: ignore[reportUnnecessaryCast] + if not isinstance(function, PydanticFunctionTool): + raise Exception( + "Expected Chat Completions function tool shape to be created using `openai.pydantic_function_tool()`" + ) + + assert "parameters" in function + new_tool = ResponsesPydanticFunctionTool( + { + "type": "function", + "name": function["name"], + "description": function.get("description"), + "parameters": function["parameters"], + "strict": function.get("strict") or False, + }, + function.model, + ) + + converted_tools.append(new_tool.cast()) + + return converted_tools diff --git a/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py.orig b/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py.orig new file mode 100644 index 00000000..dec4c193 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/responses/responses.py.orig @@ -0,0 +1,1796 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Any, List, Type, Union, Iterable, Optional, cast +from functools import partial +from typing_extensions import Literal, overload + +import httpx + +from ... import _legacy_response +from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven +from ..._utils import ( + is_given, + required_args, + maybe_transform, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from .input_items import ( + InputItems, + AsyncInputItems, + InputItemsWithRawResponse, + AsyncInputItemsWithRawResponse, + InputItemsWithStreamingResponse, + AsyncInputItemsWithStreamingResponse, +) +from ..._streaming import Stream, AsyncStream +from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool +from ..._base_client import make_request_options +from ...types.responses import response_create_params, response_retrieve_params +<<<<<<< HEAD +from ...lib._parsing._responses import ( + TextFormatT, + parse_response, + type_to_text_format_param as _type_to_text_format_param, +) +from ...types.shared.chat_model import ChatModel +||||||| parent of 001707b8 (feat(api): o1-pro now available through the API (#2228)) +from ...types.shared.chat_model import ChatModel +======= +>>>>>>> 001707b8 (feat(api): o1-pro now available through the API (#2228)) +from ...types.responses.response import Response +from ...types.responses.tool_param import ToolParam, ParseableToolParam +from ...types.shared_params.metadata import Metadata +from ...types.shared_params.reasoning import Reasoning +from ...types.responses.parsed_response import ParsedResponse +from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager +from ...types.responses.response_includable import ResponseIncludable +from ...types.shared_params.responses_model import ResponsesModel +from ...types.responses.response_input_param import ResponseInputParam +from ...types.responses.response_stream_event import ResponseStreamEvent +from ...types.responses.response_text_config_param import ResponseTextConfigParam + +__all__ = ["Responses", "AsyncResponses"] + + +class Responses(SyncAPIResource): + @cached_property + def input_items(self) -> InputItems: + return InputItems(self._client) + + @cached_property + def with_raw_response(self) -> ResponsesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return ResponsesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ResponsesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return ResponsesWithStreamingResponse(self) + + @overload + def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + stream: Literal[True], + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[ResponseStreamEvent]: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + stream: bool, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response | Stream[ResponseStreamEvent]: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["input", "model"], ["input", "model", "stream"]) + def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response | Stream[ResponseStreamEvent]: + return self._post( + "/responses", + body=maybe_transform( + { + "input": input, + "model": model, + "include": include, + "instructions": instructions, + "max_output_tokens": max_output_tokens, + "metadata": metadata, + "parallel_tool_calls": parallel_tool_calls, + "previous_response_id": previous_response_id, + "reasoning": reasoning, + "store": store, + "stream": stream, + "temperature": temperature, + "text": text, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation": truncation, + "user": user, + }, + response_create_params.ResponseCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Response, + stream=stream or False, + stream_cls=Stream[ResponseStreamEvent], + ) + + def stream( + self, + *, + input: Union[str, ResponseInputParam], + model: Union[str, ChatModel], + text_format: type[TextFormatT] | NotGiven = NOT_GIVEN, + tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseStreamManager[TextFormatT]: + if is_given(text_format): + if not text: + text = {} + + if "format" in text: + raise TypeError("Cannot mix and match text.format with text_format") + + text["format"] = _type_to_text_format_param(text_format) + + tools = _make_tools(tools) + + api_request: partial[Stream[ResponseStreamEvent]] = partial( + self.create, + input=input, + model=model, + tools=tools, + include=include, + instructions=instructions, + max_output_tokens=max_output_tokens, + metadata=metadata, + parallel_tool_calls=parallel_tool_calls, + previous_response_id=previous_response_id, + store=store, + stream=True, + temperature=temperature, + text=text, + tool_choice=tool_choice, + reasoning=reasoning, + top_p=top_p, + truncation=truncation, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + + return ResponseStreamManager( + api_request, + text_format=text_format, + input_tools=tools, + ) + + def parse( + self, + *, + input: Union[str, ResponseInputParam], + model: Union[str, ChatModel], + text_format: type[TextFormatT] | NotGiven = NOT_GIVEN, + tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ParsedResponse[TextFormatT]: + if is_given(text_format): + if not text: + text = {} + + if "format" in text: + raise TypeError("Cannot mix and match text.format with text_format") + + text["format"] = _type_to_text_format_param(text_format) + + tools = _make_tools(tools) + + def parser(raw_response: Response) -> ParsedResponse[TextFormatT]: + return parse_response( + input_tools=tools, + text_format=text_format, + response=raw_response, + ) + + return self._post( + "/responses", + body=maybe_transform( + { + "input": input, + "model": model, + "include": include, + "instructions": instructions, + "max_output_tokens": max_output_tokens, + "metadata": metadata, + "parallel_tool_calls": parallel_tool_calls, + "previous_response_id": previous_response_id, + "reasoning": reasoning, + "store": store, + "stream": stream, + "temperature": temperature, + "text": text, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation": truncation, + "user": user, + }, + response_create_params.ResponseCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + # we turn the `Response` instance into a `ParsedResponse` + # in the `parser` function above + cast_to=cast(Type[ParsedResponse[TextFormatT]], Response), + ) + + def retrieve( + self, + response_id: str, + *, + include: List[ResponseIncludable] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response: + """ + Retrieves a model response with the given ID. + + Args: + include: Additional fields to include in the response. See the `include` parameter for + Response creation above for more information. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + return self._get( + f"/responses/{response_id}", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"include": include}, response_retrieve_params.ResponseRetrieveParams), + ), + cast_to=Response, + ) + + def delete( + self, + response_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Deletes a model response with the given ID. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return self._delete( + f"/responses/{response_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class AsyncResponses(AsyncAPIResource): + @cached_property + def input_items(self) -> AsyncInputItems: + return AsyncInputItems(self._client) + + @cached_property + def with_raw_response(self) -> AsyncResponsesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncResponsesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncResponsesWithStreamingResponse(self) + + @overload + async def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + stream: Literal[True], + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ResponseStreamEvent]: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + stream: bool, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response | AsyncStream[ResponseStreamEvent]: + """Creates a model response. + + Provide + [text](https://platform.openai.com/docs/guides/text) or + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) or + [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have + the model call your own + [custom code](https://platform.openai.com/docs/guides/function-calling) or use + built-in [tools](https://platform.openai.com/docs/guides/tools) like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search) to use + your own data as input for the model's response. + + Args: + input: Text, image, or file inputs to the model, used to generate a response. + + Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + + model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the + [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + for more information. + + include: Specify additional output data to include in the model response. Currently + supported values are: + + - `file_search_call.results`: Include the search results of the file search tool + call. + - `message.input_image.image_url`: Include image urls from the input message. + - `computer_call_output.output.image_url`: Include image urls from the computer + call output. + + instructions: Inserts a system (or developer) message as the first item in the model's + context. + + When using along with `previous_response_id`, the instructions from a previous + response will be not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + + max_output_tokens: An upper bound for the number of tokens that can be generated for a response, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + parallel_tool_calls: Whether to allow the model to run tool calls in parallel. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + + reasoning: **o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + store: Whether to store the generated model response for later retrieval via API. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + text: Configuration options for a text response from the model. Can be plain text or + structured JSON data. Learn more: + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + + tool_choice: How the model should select which tool (or tools) to use when generating a + response. See the `tools` parameter to see how to specify which tools the model + can call. + + tools: An array of tools the model may call while generating a response. You can + specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the model's + capabilities, like + [web search](https://platform.openai.com/docs/guides/tools-web-search) or + [file search](https://platform.openai.com/docs/guides/tools-file-search). + Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, enabling + the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + truncation: The truncation strategy to use for the model response. + + - `auto`: If the context of this response and previous ones exceeds the model's + context window size, the model will truncate the response to fit the context + window by dropping input items in the middle of the conversation. + - `disabled` (default): If a model response will exceed the context window size + for a model, the request will fail with a 400 error. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["input", "model"], ["input", "model", "stream"]) + async def create( + self, + *, + input: Union[str, ResponseInputParam], + model: ResponsesModel, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response | AsyncStream[ResponseStreamEvent]: + return await self._post( + "/responses", + body=await async_maybe_transform( + { + "input": input, + "model": model, + "include": include, + "instructions": instructions, + "max_output_tokens": max_output_tokens, + "metadata": metadata, + "parallel_tool_calls": parallel_tool_calls, + "previous_response_id": previous_response_id, + "reasoning": reasoning, + "store": store, + "stream": stream, + "temperature": temperature, + "text": text, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation": truncation, + "user": user, + }, + response_create_params.ResponseCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Response, + stream=stream or False, + stream_cls=AsyncStream[ResponseStreamEvent], + ) + + def stream( + self, + *, + input: Union[str, ResponseInputParam], + model: Union[str, ChatModel], + text_format: type[TextFormatT] | NotGiven = NOT_GIVEN, + tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncResponseStreamManager[TextFormatT]: + if is_given(text_format): + if not text: + text = {} + + if "format" in text: + raise TypeError("Cannot mix and match text.format with text_format") + + text["format"] = _type_to_text_format_param(text_format) + + tools = _make_tools(tools) + + api_request = self.create( + input=input, + model=model, + tools=tools, + include=include, + instructions=instructions, + max_output_tokens=max_output_tokens, + metadata=metadata, + parallel_tool_calls=parallel_tool_calls, + previous_response_id=previous_response_id, + store=store, + stream=True, + temperature=temperature, + text=text, + tool_choice=tool_choice, + reasoning=reasoning, + top_p=top_p, + truncation=truncation, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + + return AsyncResponseStreamManager( + api_request, + text_format=text_format, + input_tools=tools, + ) + + async def parse( + self, + *, + input: Union[str, ResponseInputParam], + model: Union[str, ChatModel], + text_format: type[TextFormatT] | NotGiven = NOT_GIVEN, + tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN, + include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN, + instructions: Optional[str] | NotGiven = NOT_GIVEN, + max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, + previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, + reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + store: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, + tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ParsedResponse[TextFormatT]: + if is_given(text_format): + if not text: + text = {} + + if "format" in text: + raise TypeError("Cannot mix and match text.format with text_format") + + text["format"] = _type_to_text_format_param(text_format) + + tools = _make_tools(tools) + + def parser(raw_response: Response) -> ParsedResponse[TextFormatT]: + return parse_response( + input_tools=tools, + text_format=text_format, + response=raw_response, + ) + + return await self._post( + "/responses", + body=maybe_transform( + { + "input": input, + "model": model, + "include": include, + "instructions": instructions, + "max_output_tokens": max_output_tokens, + "metadata": metadata, + "parallel_tool_calls": parallel_tool_calls, + "previous_response_id": previous_response_id, + "reasoning": reasoning, + "store": store, + "stream": stream, + "temperature": temperature, + "text": text, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "truncation": truncation, + "user": user, + }, + response_create_params.ResponseCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + # we turn the `Response` instance into a `ParsedResponse` + # in the `parser` function above + cast_to=cast(Type[ParsedResponse[TextFormatT]], Response), + ) + + async def retrieve( + self, + response_id: str, + *, + include: List[ResponseIncludable] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Response: + """ + Retrieves a model response with the given ID. + + Args: + include: Additional fields to include in the response. See the `include` parameter for + Response creation above for more information. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + return await self._get( + f"/responses/{response_id}", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform( + {"include": include}, response_retrieve_params.ResponseRetrieveParams + ), + ), + cast_to=Response, + ) + + async def delete( + self, + response_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Deletes a model response with the given ID. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return await self._delete( + f"/responses/{response_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class ResponsesWithRawResponse: + def __init__(self, responses: Responses) -> None: + self._responses = responses + + self.create = _legacy_response.to_raw_response_wrapper( + responses.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + responses.retrieve, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + responses.delete, + ) + + @cached_property + def input_items(self) -> InputItemsWithRawResponse: + return InputItemsWithRawResponse(self._responses.input_items) + + +class AsyncResponsesWithRawResponse: + def __init__(self, responses: AsyncResponses) -> None: + self._responses = responses + + self.create = _legacy_response.async_to_raw_response_wrapper( + responses.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + responses.retrieve, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + responses.delete, + ) + + @cached_property + def input_items(self) -> AsyncInputItemsWithRawResponse: + return AsyncInputItemsWithRawResponse(self._responses.input_items) + + +class ResponsesWithStreamingResponse: + def __init__(self, responses: Responses) -> None: + self._responses = responses + + self.create = to_streamed_response_wrapper( + responses.create, + ) + self.retrieve = to_streamed_response_wrapper( + responses.retrieve, + ) + self.delete = to_streamed_response_wrapper( + responses.delete, + ) + + @cached_property + def input_items(self) -> InputItemsWithStreamingResponse: + return InputItemsWithStreamingResponse(self._responses.input_items) + + +class AsyncResponsesWithStreamingResponse: + def __init__(self, responses: AsyncResponses) -> None: + self._responses = responses + + self.create = async_to_streamed_response_wrapper( + responses.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + responses.retrieve, + ) + self.delete = async_to_streamed_response_wrapper( + responses.delete, + ) + + @cached_property + def input_items(self) -> AsyncInputItemsWithStreamingResponse: + return AsyncInputItemsWithStreamingResponse(self._responses.input_items) + + +def _make_tools(tools: Iterable[ParseableToolParam] | NotGiven) -> List[ToolParam] | NotGiven: + if not is_given(tools): + return NOT_GIVEN + + converted_tools: List[ToolParam] = [] + for tool in tools: + if tool["type"] != "function": + converted_tools.append(tool) + continue + + if "function" not in tool: + # standard Responses API case + converted_tools.append(tool) + continue + + function = cast(Any, tool)["function"] # pyright: ignore[reportUnnecessaryCast] + if not isinstance(function, PydanticFunctionTool): + raise Exception( + "Expected Chat Completions function tool shape to be created using `openai.pydantic_function_tool()`" + ) + + assert "parameters" in function + new_tool = ResponsesPydanticFunctionTool( + { + "type": "function", + "name": function["name"], + "description": function.get("description"), + "parameters": function["parameters"], + "strict": function.get("strict") or False, + }, + function.model, + ) + + converted_tools.append(new_tool.cast()) + + return converted_tools diff --git a/.venv/lib/python3.12/site-packages/openai/resources/uploads/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/uploads/__init__.py new file mode 100644 index 00000000..12d1056f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/uploads/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .parts import ( + Parts, + AsyncParts, + PartsWithRawResponse, + AsyncPartsWithRawResponse, + PartsWithStreamingResponse, + AsyncPartsWithStreamingResponse, +) +from .uploads import ( + Uploads, + AsyncUploads, + UploadsWithRawResponse, + AsyncUploadsWithRawResponse, + UploadsWithStreamingResponse, + AsyncUploadsWithStreamingResponse, +) + +__all__ = [ + "Parts", + "AsyncParts", + "PartsWithRawResponse", + "AsyncPartsWithRawResponse", + "PartsWithStreamingResponse", + "AsyncPartsWithStreamingResponse", + "Uploads", + "AsyncUploads", + "UploadsWithRawResponse", + "AsyncUploadsWithRawResponse", + "UploadsWithStreamingResponse", + "AsyncUploadsWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/uploads/parts.py b/.venv/lib/python3.12/site-packages/openai/resources/uploads/parts.py new file mode 100644 index 00000000..777469ac --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/uploads/parts.py @@ -0,0 +1,210 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Mapping, cast + +import httpx + +from ... import _legacy_response +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes +from ..._utils import ( + extract_files, + maybe_transform, + deepcopy_minimal, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ..._base_client import make_request_options +from ...types.uploads import part_create_params +from ...types.uploads.upload_part import UploadPart + +__all__ = ["Parts", "AsyncParts"] + + +class Parts(SyncAPIResource): + @cached_property + def with_raw_response(self) -> PartsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return PartsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> PartsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return PartsWithStreamingResponse(self) + + def create( + self, + upload_id: str, + *, + data: FileTypes, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> UploadPart: + """ + Adds a + [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an + [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object. + A Part represents a chunk of bytes from the file you are trying to upload. + + Each Part can be at most 64 MB, and you can add Parts until you hit the Upload + maximum of 8 GB. + + It is possible to add multiple Parts in parallel. You can decide the intended + order of the Parts when you + [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete). + + Args: + data: The chunk of bytes for this Part. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + body = deepcopy_minimal({"data": data}) + files = extract_files(cast(Mapping[str, object], body), paths=[["data"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return self._post( + f"/uploads/{upload_id}/parts", + body=maybe_transform(body, part_create_params.PartCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadPart, + ) + + +class AsyncParts(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncPartsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncPartsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncPartsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncPartsWithStreamingResponse(self) + + async def create( + self, + upload_id: str, + *, + data: FileTypes, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> UploadPart: + """ + Adds a + [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an + [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object. + A Part represents a chunk of bytes from the file you are trying to upload. + + Each Part can be at most 64 MB, and you can add Parts until you hit the Upload + maximum of 8 GB. + + It is possible to add multiple Parts in parallel. You can decide the intended + order of the Parts when you + [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete). + + Args: + data: The chunk of bytes for this Part. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + body = deepcopy_minimal({"data": data}) + files = extract_files(cast(Mapping[str, object], body), paths=[["data"]]) + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return await self._post( + f"/uploads/{upload_id}/parts", + body=await async_maybe_transform(body, part_create_params.PartCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadPart, + ) + + +class PartsWithRawResponse: + def __init__(self, parts: Parts) -> None: + self._parts = parts + + self.create = _legacy_response.to_raw_response_wrapper( + parts.create, + ) + + +class AsyncPartsWithRawResponse: + def __init__(self, parts: AsyncParts) -> None: + self._parts = parts + + self.create = _legacy_response.async_to_raw_response_wrapper( + parts.create, + ) + + +class PartsWithStreamingResponse: + def __init__(self, parts: Parts) -> None: + self._parts = parts + + self.create = to_streamed_response_wrapper( + parts.create, + ) + + +class AsyncPartsWithStreamingResponse: + def __init__(self, parts: AsyncParts) -> None: + self._parts = parts + + self.create = async_to_streamed_response_wrapper( + parts.create, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/uploads/uploads.py b/.venv/lib/python3.12/site-packages/openai/resources/uploads/uploads.py new file mode 100644 index 00000000..9297dbc2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/uploads/uploads.py @@ -0,0 +1,714 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import io +import os +import logging +import builtins +from typing import List, overload +from pathlib import Path + +import anyio +import httpx + +from ... import _legacy_response +from .parts import ( + Parts, + AsyncParts, + PartsWithRawResponse, + AsyncPartsWithRawResponse, + PartsWithStreamingResponse, + AsyncPartsWithStreamingResponse, +) +from ...types import FilePurpose, upload_create_params, upload_complete_params +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._utils import ( + maybe_transform, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ..._base_client import make_request_options +from ...types.upload import Upload +from ...types.file_purpose import FilePurpose + +__all__ = ["Uploads", "AsyncUploads"] + + +# 64MB +DEFAULT_PART_SIZE = 64 * 1024 * 1024 + +log: logging.Logger = logging.getLogger(__name__) + + +class Uploads(SyncAPIResource): + @cached_property + def parts(self) -> Parts: + return Parts(self._client) + + @cached_property + def with_raw_response(self) -> UploadsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return UploadsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> UploadsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return UploadsWithStreamingResponse(self) + + @overload + def upload_file_chunked( + self, + *, + file: os.PathLike[str], + mime_type: str, + purpose: FilePurpose, + bytes: int | None = None, + part_size: int | None = None, + md5: str | NotGiven = NOT_GIVEN, + ) -> Upload: + """Splits a file into multiple 64MB parts and uploads them sequentially.""" + + @overload + def upload_file_chunked( + self, + *, + file: bytes, + filename: str, + bytes: int, + mime_type: str, + purpose: FilePurpose, + part_size: int | None = None, + md5: str | NotGiven = NOT_GIVEN, + ) -> Upload: + """Splits an in-memory file into multiple 64MB parts and uploads them sequentially.""" + + def upload_file_chunked( + self, + *, + file: os.PathLike[str] | bytes, + mime_type: str, + purpose: FilePurpose, + filename: str | None = None, + bytes: int | None = None, + part_size: int | None = None, + md5: str | NotGiven = NOT_GIVEN, + ) -> Upload: + """Splits the given file into multiple parts and uploads them sequentially. + + ```py + from pathlib import Path + + client.uploads.upload_file( + file=Path("my-paper.pdf"), + mime_type="pdf", + purpose="assistants", + ) + ``` + """ + if isinstance(file, builtins.bytes): + if filename is None: + raise TypeError("The `filename` argument must be given for in-memory files") + + if bytes is None: + raise TypeError("The `bytes` argument must be given for in-memory files") + else: + if not isinstance(file, Path): + file = Path(file) + + if not filename: + filename = file.name + + if bytes is None: + bytes = file.stat().st_size + + upload = self.create( + bytes=bytes, + filename=filename, + mime_type=mime_type, + purpose=purpose, + ) + + part_ids: list[str] = [] + + if part_size is None: + part_size = DEFAULT_PART_SIZE + + if isinstance(file, builtins.bytes): + buf: io.FileIO | io.BytesIO = io.BytesIO(file) + else: + buf = io.FileIO(file) + + try: + while True: + data = buf.read(part_size) + if not data: + # EOF + break + + part = self.parts.create(upload_id=upload.id, data=data) + log.info("Uploaded part %s for upload %s", part.id, upload.id) + part_ids.append(part.id) + except Exception: + buf.close() + raise + + return self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5) + + def create( + self, + *, + bytes: int, + filename: str, + mime_type: str, + purpose: FilePurpose, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Upload: + """ + Creates an intermediate + [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object + that you can add + [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to. + Currently, an Upload can accept at most 8 GB in total and expires after an hour + after you create it. + + Once you complete the Upload, we will create a + [File](https://platform.openai.com/docs/api-reference/files/object) object that + contains all the parts you uploaded. This File is usable in the rest of our + platform as a regular File object. + + For certain `purpose` values, the correct `mime_type` must be specified. Please + refer to documentation for the + [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files). + + For guidance on the proper filename extensions for each purpose, please follow + the documentation on + [creating a File](https://platform.openai.com/docs/api-reference/files/create). + + Args: + bytes: The number of bytes in the file you are uploading. + + filename: The name of the file to upload. + + mime_type: The MIME type of the file. + + This must fall within the supported MIME types for your file purpose. See the + supported MIME types for assistants and vision. + + purpose: The intended purpose of the uploaded file. + + See the + [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/uploads", + body=maybe_transform( + { + "bytes": bytes, + "filename": filename, + "mime_type": mime_type, + "purpose": purpose, + }, + upload_create_params.UploadCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Upload, + ) + + def cancel( + self, + upload_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Upload: + """Cancels the Upload. + + No Parts may be added after an Upload is cancelled. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return self._post( + f"/uploads/{upload_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Upload, + ) + + def complete( + self, + upload_id: str, + *, + part_ids: List[str], + md5: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Upload: + """ + Completes the + [Upload](https://platform.openai.com/docs/api-reference/uploads/object). + + Within the returned Upload object, there is a nested + [File](https://platform.openai.com/docs/api-reference/files/object) object that + is ready to use in the rest of the platform. + + You can specify the order of the Parts by passing in an ordered list of the Part + IDs. + + The number of bytes uploaded upon completion must match the number of bytes + initially specified when creating the Upload object. No Parts may be added after + an Upload is completed. + + Args: + part_ids: The ordered list of Part IDs. + + md5: The optional md5 checksum for the file contents to verify if the bytes uploaded + matches what you expect. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return self._post( + f"/uploads/{upload_id}/complete", + body=maybe_transform( + { + "part_ids": part_ids, + "md5": md5, + }, + upload_complete_params.UploadCompleteParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Upload, + ) + + +class AsyncUploads(AsyncAPIResource): + @cached_property + def parts(self) -> AsyncParts: + return AsyncParts(self._client) + + @cached_property + def with_raw_response(self) -> AsyncUploadsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncUploadsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncUploadsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncUploadsWithStreamingResponse(self) + + @overload + async def upload_file_chunked( + self, + *, + file: os.PathLike[str], + mime_type: str, + purpose: FilePurpose, + bytes: int | None = None, + part_size: int | None = None, + md5: str | NotGiven = NOT_GIVEN, + ) -> Upload: + """Splits a file into multiple 64MB parts and uploads them sequentially.""" + + @overload + async def upload_file_chunked( + self, + *, + file: bytes, + filename: str, + bytes: int, + mime_type: str, + purpose: FilePurpose, + part_size: int | None = None, + md5: str | NotGiven = NOT_GIVEN, + ) -> Upload: + """Splits an in-memory file into multiple 64MB parts and uploads them sequentially.""" + + async def upload_file_chunked( + self, + *, + file: os.PathLike[str] | bytes, + mime_type: str, + purpose: FilePurpose, + filename: str | None = None, + bytes: int | None = None, + part_size: int | None = None, + md5: str | NotGiven = NOT_GIVEN, + ) -> Upload: + """Splits the given file into multiple parts and uploads them sequentially. + + ```py + from pathlib import Path + + client.uploads.upload_file( + file=Path("my-paper.pdf"), + mime_type="pdf", + purpose="assistants", + ) + ``` + """ + if isinstance(file, builtins.bytes): + if filename is None: + raise TypeError("The `filename` argument must be given for in-memory files") + + if bytes is None: + raise TypeError("The `bytes` argument must be given for in-memory files") + else: + if not isinstance(file, anyio.Path): + file = anyio.Path(file) + + if not filename: + filename = file.name + + if bytes is None: + stat = await file.stat() + bytes = stat.st_size + + upload = await self.create( + bytes=bytes, + filename=filename, + mime_type=mime_type, + purpose=purpose, + ) + + part_ids: list[str] = [] + + if part_size is None: + part_size = DEFAULT_PART_SIZE + + if isinstance(file, anyio.Path): + fd = await file.open("rb") + async with fd: + while True: + data = await fd.read(part_size) + if not data: + # EOF + break + + part = await self.parts.create(upload_id=upload.id, data=data) + log.info("Uploaded part %s for upload %s", part.id, upload.id) + part_ids.append(part.id) + else: + buf = io.BytesIO(file) + + try: + while True: + data = buf.read(part_size) + if not data: + # EOF + break + + part = await self.parts.create(upload_id=upload.id, data=data) + log.info("Uploaded part %s for upload %s", part.id, upload.id) + part_ids.append(part.id) + except Exception: + buf.close() + raise + + return await self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5) + + async def create( + self, + *, + bytes: int, + filename: str, + mime_type: str, + purpose: FilePurpose, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Upload: + """ + Creates an intermediate + [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object + that you can add + [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to. + Currently, an Upload can accept at most 8 GB in total and expires after an hour + after you create it. + + Once you complete the Upload, we will create a + [File](https://platform.openai.com/docs/api-reference/files/object) object that + contains all the parts you uploaded. This File is usable in the rest of our + platform as a regular File object. + + For certain `purpose` values, the correct `mime_type` must be specified. Please + refer to documentation for the + [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files). + + For guidance on the proper filename extensions for each purpose, please follow + the documentation on + [creating a File](https://platform.openai.com/docs/api-reference/files/create). + + Args: + bytes: The number of bytes in the file you are uploading. + + filename: The name of the file to upload. + + mime_type: The MIME type of the file. + + This must fall within the supported MIME types for your file purpose. See the + supported MIME types for assistants and vision. + + purpose: The intended purpose of the uploaded file. + + See the + [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/uploads", + body=await async_maybe_transform( + { + "bytes": bytes, + "filename": filename, + "mime_type": mime_type, + "purpose": purpose, + }, + upload_create_params.UploadCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Upload, + ) + + async def cancel( + self, + upload_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Upload: + """Cancels the Upload. + + No Parts may be added after an Upload is cancelled. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return await self._post( + f"/uploads/{upload_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Upload, + ) + + async def complete( + self, + upload_id: str, + *, + part_ids: List[str], + md5: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Upload: + """ + Completes the + [Upload](https://platform.openai.com/docs/api-reference/uploads/object). + + Within the returned Upload object, there is a nested + [File](https://platform.openai.com/docs/api-reference/files/object) object that + is ready to use in the rest of the platform. + + You can specify the order of the Parts by passing in an ordered list of the Part + IDs. + + The number of bytes uploaded upon completion must match the number of bytes + initially specified when creating the Upload object. No Parts may be added after + an Upload is completed. + + Args: + part_ids: The ordered list of Part IDs. + + md5: The optional md5 checksum for the file contents to verify if the bytes uploaded + matches what you expect. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return await self._post( + f"/uploads/{upload_id}/complete", + body=await async_maybe_transform( + { + "part_ids": part_ids, + "md5": md5, + }, + upload_complete_params.UploadCompleteParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Upload, + ) + + +class UploadsWithRawResponse: + def __init__(self, uploads: Uploads) -> None: + self._uploads = uploads + + self.create = _legacy_response.to_raw_response_wrapper( + uploads.create, + ) + self.cancel = _legacy_response.to_raw_response_wrapper( + uploads.cancel, + ) + self.complete = _legacy_response.to_raw_response_wrapper( + uploads.complete, + ) + + @cached_property + def parts(self) -> PartsWithRawResponse: + return PartsWithRawResponse(self._uploads.parts) + + +class AsyncUploadsWithRawResponse: + def __init__(self, uploads: AsyncUploads) -> None: + self._uploads = uploads + + self.create = _legacy_response.async_to_raw_response_wrapper( + uploads.create, + ) + self.cancel = _legacy_response.async_to_raw_response_wrapper( + uploads.cancel, + ) + self.complete = _legacy_response.async_to_raw_response_wrapper( + uploads.complete, + ) + + @cached_property + def parts(self) -> AsyncPartsWithRawResponse: + return AsyncPartsWithRawResponse(self._uploads.parts) + + +class UploadsWithStreamingResponse: + def __init__(self, uploads: Uploads) -> None: + self._uploads = uploads + + self.create = to_streamed_response_wrapper( + uploads.create, + ) + self.cancel = to_streamed_response_wrapper( + uploads.cancel, + ) + self.complete = to_streamed_response_wrapper( + uploads.complete, + ) + + @cached_property + def parts(self) -> PartsWithStreamingResponse: + return PartsWithStreamingResponse(self._uploads.parts) + + +class AsyncUploadsWithStreamingResponse: + def __init__(self, uploads: AsyncUploads) -> None: + self._uploads = uploads + + self.create = async_to_streamed_response_wrapper( + uploads.create, + ) + self.cancel = async_to_streamed_response_wrapper( + uploads.cancel, + ) + self.complete = async_to_streamed_response_wrapper( + uploads.complete, + ) + + @cached_property + def parts(self) -> AsyncPartsWithStreamingResponse: + return AsyncPartsWithStreamingResponse(self._uploads.parts) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/__init__.py new file mode 100644 index 00000000..96ae16c3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/__init__.py @@ -0,0 +1,47 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .files import ( + Files, + AsyncFiles, + FilesWithRawResponse, + AsyncFilesWithRawResponse, + FilesWithStreamingResponse, + AsyncFilesWithStreamingResponse, +) +from .file_batches import ( + FileBatches, + AsyncFileBatches, + FileBatchesWithRawResponse, + AsyncFileBatchesWithRawResponse, + FileBatchesWithStreamingResponse, + AsyncFileBatchesWithStreamingResponse, +) +from .vector_stores import ( + VectorStores, + AsyncVectorStores, + VectorStoresWithRawResponse, + AsyncVectorStoresWithRawResponse, + VectorStoresWithStreamingResponse, + AsyncVectorStoresWithStreamingResponse, +) + +__all__ = [ + "Files", + "AsyncFiles", + "FilesWithRawResponse", + "AsyncFilesWithRawResponse", + "FilesWithStreamingResponse", + "AsyncFilesWithStreamingResponse", + "FileBatches", + "AsyncFileBatches", + "FileBatchesWithRawResponse", + "AsyncFileBatchesWithRawResponse", + "FileBatchesWithStreamingResponse", + "AsyncFileBatchesWithStreamingResponse", + "VectorStores", + "AsyncVectorStores", + "VectorStoresWithRawResponse", + "AsyncVectorStoresWithRawResponse", + "VectorStoresWithStreamingResponse", + "AsyncVectorStoresWithStreamingResponse", +] diff --git a/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/file_batches.py b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/file_batches.py new file mode 100644 index 00000000..9b4b64d3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/file_batches.py @@ -0,0 +1,801 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import asyncio +from typing import Dict, List, Iterable, Optional +from typing_extensions import Union, Literal +from concurrent.futures import Future, ThreadPoolExecutor, as_completed + +import httpx +import sniffio + +from ... import _legacy_response +from ...types import FileChunkingStrategyParam +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes +from ..._utils import ( + is_given, + maybe_transform, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...pagination import SyncCursorPage, AsyncCursorPage +from ..._base_client import AsyncPaginator, make_request_options +from ...types.file_object import FileObject +from ...types.vector_stores import file_batch_create_params, file_batch_list_files_params +from ...types.file_chunking_strategy_param import FileChunkingStrategyParam +from ...types.vector_stores.vector_store_file import VectorStoreFile +from ...types.vector_stores.vector_store_file_batch import VectorStoreFileBatch + +__all__ = ["FileBatches", "AsyncFileBatches"] + + +class FileBatches(SyncAPIResource): + @cached_property + def with_raw_response(self) -> FileBatchesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return FileBatchesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> FileBatchesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return FileBatchesWithStreamingResponse(self) + + def create( + self, + vector_store_id: str, + *, + file_ids: List[str], + attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """ + Create a vector store file batch. + + Args: + file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that + the vector store should use. Useful for tools like `file_search` that can access + files. + + attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters, booleans, or numbers. + + chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` + strategy. Only applicable if `file_ids` is non-empty. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/vector_stores/{vector_store_id}/file_batches", + body=maybe_transform( + { + "file_ids": file_ids, + "attributes": attributes, + "chunking_strategy": chunking_strategy, + }, + file_batch_create_params.FileBatchCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFileBatch, + ) + + def retrieve( + self, + batch_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """ + Retrieves a vector store file batch. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get( + f"/vector_stores/{vector_store_id}/file_batches/{batch_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFileBatch, + ) + + def cancel( + self, + batch_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """Cancel a vector store file batch. + + This attempts to cancel the processing of + files in this batch as soon as possible. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFileBatch, + ) + + def create_and_poll( + self, + vector_store_id: str, + *, + file_ids: List[str], + poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """Create a vector store batch and poll until all files have been processed.""" + batch = self.create( + vector_store_id=vector_store_id, + file_ids=file_ids, + chunking_strategy=chunking_strategy, + ) + # TODO: don't poll unless necessary?? + return self.poll( + batch.id, + vector_store_id=vector_store_id, + poll_interval_ms=poll_interval_ms, + ) + + def list_files( + self, + batch_id: str, + *, + vector_store_id: str, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[VectorStoreFile]: + """ + Returns a list of vector store files in a batch. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", + page=SyncCursorPage[VectorStoreFile], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "filter": filter, + "limit": limit, + "order": order, + }, + file_batch_list_files_params.FileBatchListFilesParams, + ), + ), + model=VectorStoreFile, + ) + + def poll( + self, + batch_id: str, + *, + vector_store_id: str, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """Wait for the given file batch to be processed. + + Note: this will return even if one of the files failed to process, you need to + check batch.file_counts.failed_count to handle this case. + """ + headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"} + if is_given(poll_interval_ms): + headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms) + + while True: + response = self.with_raw_response.retrieve( + batch_id, + vector_store_id=vector_store_id, + extra_headers=headers, + ) + + batch = response.parse() + if batch.file_counts.in_progress > 0: + if not is_given(poll_interval_ms): + from_header = response.headers.get("openai-poll-after-ms") + if from_header is not None: + poll_interval_ms = int(from_header) + else: + poll_interval_ms = 1000 + + self._sleep(poll_interval_ms / 1000) + continue + + return batch + + def upload_and_poll( + self, + vector_store_id: str, + *, + files: Iterable[FileTypes], + max_concurrency: int = 5, + file_ids: List[str] = [], + poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """Uploads the given files concurrently and then creates a vector store file batch. + + If you've already uploaded certain files that you want to include in this batch + then you can pass their IDs through the `file_ids` argument. + + By default, if any file upload fails then an exception will be eagerly raised. + + The number of concurrency uploads is configurable using the `max_concurrency` + parameter. + + Note: this method only supports `asyncio` or `trio` as the backing async + runtime. + """ + results: list[FileObject] = [] + + with ThreadPoolExecutor(max_workers=max_concurrency) as executor: + futures: list[Future[FileObject]] = [ + executor.submit( + self._client.files.create, + file=file, + purpose="assistants", + ) + for file in files + ] + + for future in as_completed(futures): + exc = future.exception() + if exc: + raise exc + + results.append(future.result()) + + batch = self.create_and_poll( + vector_store_id=vector_store_id, + file_ids=[*file_ids, *(f.id for f in results)], + poll_interval_ms=poll_interval_ms, + chunking_strategy=chunking_strategy, + ) + return batch + + +class AsyncFileBatches(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncFileBatchesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncFileBatchesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncFileBatchesWithStreamingResponse(self) + + async def create( + self, + vector_store_id: str, + *, + file_ids: List[str], + attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """ + Create a vector store file batch. + + Args: + file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that + the vector store should use. Useful for tools like `file_search` that can access + files. + + attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters, booleans, or numbers. + + chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` + strategy. Only applicable if `file_ids` is non-empty. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/vector_stores/{vector_store_id}/file_batches", + body=await async_maybe_transform( + { + "file_ids": file_ids, + "attributes": attributes, + "chunking_strategy": chunking_strategy, + }, + file_batch_create_params.FileBatchCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFileBatch, + ) + + async def retrieve( + self, + batch_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """ + Retrieves a vector store file batch. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._get( + f"/vector_stores/{vector_store_id}/file_batches/{batch_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFileBatch, + ) + + async def cancel( + self, + batch_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """Cancel a vector store file batch. + + This attempts to cancel the processing of + files in this batch as soon as possible. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFileBatch, + ) + + async def create_and_poll( + self, + vector_store_id: str, + *, + file_ids: List[str], + poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """Create a vector store batch and poll until all files have been processed.""" + batch = await self.create( + vector_store_id=vector_store_id, + file_ids=file_ids, + chunking_strategy=chunking_strategy, + ) + # TODO: don't poll unless necessary?? + return await self.poll( + batch.id, + vector_store_id=vector_store_id, + poll_interval_ms=poll_interval_ms, + ) + + def list_files( + self, + batch_id: str, + *, + vector_store_id: str, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]: + """ + Returns a list of vector store files in a batch. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", + page=AsyncCursorPage[VectorStoreFile], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "filter": filter, + "limit": limit, + "order": order, + }, + file_batch_list_files_params.FileBatchListFilesParams, + ), + ), + model=VectorStoreFile, + ) + + async def poll( + self, + batch_id: str, + *, + vector_store_id: str, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """Wait for the given file batch to be processed. + + Note: this will return even if one of the files failed to process, you need to + check batch.file_counts.failed_count to handle this case. + """ + headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"} + if is_given(poll_interval_ms): + headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms) + + while True: + response = await self.with_raw_response.retrieve( + batch_id, + vector_store_id=vector_store_id, + extra_headers=headers, + ) + + batch = response.parse() + if batch.file_counts.in_progress > 0: + if not is_given(poll_interval_ms): + from_header = response.headers.get("openai-poll-after-ms") + if from_header is not None: + poll_interval_ms = int(from_header) + else: + poll_interval_ms = 1000 + + await self._sleep(poll_interval_ms / 1000) + continue + + return batch + + async def upload_and_poll( + self, + vector_store_id: str, + *, + files: Iterable[FileTypes], + max_concurrency: int = 5, + file_ids: List[str] = [], + poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileBatch: + """Uploads the given files concurrently and then creates a vector store file batch. + + If you've already uploaded certain files that you want to include in this batch + then you can pass their IDs through the `file_ids` argument. + + By default, if any file upload fails then an exception will be eagerly raised. + + The number of concurrency uploads is configurable using the `max_concurrency` + parameter. + + Note: this method only supports `asyncio` or `trio` as the backing async + runtime. + """ + uploaded_files: list[FileObject] = [] + + async_library = sniffio.current_async_library() + + if async_library == "asyncio": + + async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None: + async with semaphore: + file_obj = await self._client.files.create( + file=file, + purpose="assistants", + ) + uploaded_files.append(file_obj) + + semaphore = asyncio.Semaphore(max_concurrency) + + tasks = [asyncio_upload_file(semaphore, file) for file in files] + + await asyncio.gather(*tasks) + elif async_library == "trio": + # We only import if the library is being used. + # We support Python 3.7 so are using an older version of trio that does not have type information + import trio # type: ignore # pyright: ignore[reportMissingTypeStubs] + + async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None: + async with limiter: + file_obj = await self._client.files.create( + file=file, + purpose="assistants", + ) + uploaded_files.append(file_obj) + + limiter = trio.CapacityLimiter(max_concurrency) + + async with trio.open_nursery() as nursery: + for file in files: + nursery.start_soon(trio_upload_file, limiter, file) # pyright: ignore [reportUnknownMemberType] + else: + raise RuntimeError( + f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported", + ) + + batch = await self.create_and_poll( + vector_store_id=vector_store_id, + file_ids=[*file_ids, *(f.id for f in uploaded_files)], + poll_interval_ms=poll_interval_ms, + chunking_strategy=chunking_strategy, + ) + return batch + + +class FileBatchesWithRawResponse: + def __init__(self, file_batches: FileBatches) -> None: + self._file_batches = file_batches + + self.create = _legacy_response.to_raw_response_wrapper( + file_batches.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + file_batches.retrieve, + ) + self.cancel = _legacy_response.to_raw_response_wrapper( + file_batches.cancel, + ) + self.list_files = _legacy_response.to_raw_response_wrapper( + file_batches.list_files, + ) + + +class AsyncFileBatchesWithRawResponse: + def __init__(self, file_batches: AsyncFileBatches) -> None: + self._file_batches = file_batches + + self.create = _legacy_response.async_to_raw_response_wrapper( + file_batches.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + file_batches.retrieve, + ) + self.cancel = _legacy_response.async_to_raw_response_wrapper( + file_batches.cancel, + ) + self.list_files = _legacy_response.async_to_raw_response_wrapper( + file_batches.list_files, + ) + + +class FileBatchesWithStreamingResponse: + def __init__(self, file_batches: FileBatches) -> None: + self._file_batches = file_batches + + self.create = to_streamed_response_wrapper( + file_batches.create, + ) + self.retrieve = to_streamed_response_wrapper( + file_batches.retrieve, + ) + self.cancel = to_streamed_response_wrapper( + file_batches.cancel, + ) + self.list_files = to_streamed_response_wrapper( + file_batches.list_files, + ) + + +class AsyncFileBatchesWithStreamingResponse: + def __init__(self, file_batches: AsyncFileBatches) -> None: + self._file_batches = file_batches + + self.create = async_to_streamed_response_wrapper( + file_batches.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + file_batches.retrieve, + ) + self.cancel = async_to_streamed_response_wrapper( + file_batches.cancel, + ) + self.list_files = async_to_streamed_response_wrapper( + file_batches.list_files, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/files.py b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/files.py new file mode 100644 index 00000000..7d93798a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/files.py @@ -0,0 +1,933 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Dict, Union, Optional +from typing_extensions import Literal, assert_never + +import httpx + +from ... import _legacy_response +from ...types import FileChunkingStrategyParam +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes +from ..._utils import ( + is_given, + maybe_transform, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage +from ..._base_client import AsyncPaginator, make_request_options +from ...types.vector_stores import file_list_params, file_create_params, file_update_params +from ...types.file_chunking_strategy_param import FileChunkingStrategyParam +from ...types.vector_stores.vector_store_file import VectorStoreFile +from ...types.vector_stores.file_content_response import FileContentResponse +from ...types.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted + +__all__ = ["Files", "AsyncFiles"] + + +class Files(SyncAPIResource): + @cached_property + def with_raw_response(self) -> FilesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return FilesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> FilesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return FilesWithStreamingResponse(self) + + def create( + self, + vector_store_id: str, + *, + file_id: str, + attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """ + Create a vector store file by attaching a + [File](https://platform.openai.com/docs/api-reference/files) to a + [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object). + + Args: + file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the + vector store should use. Useful for tools like `file_search` that can access + files. + + attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters, booleans, or numbers. + + chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` + strategy. Only applicable if `file_ids` is non-empty. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/vector_stores/{vector_store_id}/files", + body=maybe_transform( + { + "file_id": file_id, + "attributes": attributes, + "chunking_strategy": chunking_strategy, + }, + file_create_params.FileCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFile, + ) + + def retrieve( + self, + file_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """ + Retrieves a vector store file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get( + f"/vector_stores/{vector_store_id}/files/{file_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFile, + ) + + def update( + self, + file_id: str, + *, + vector_store_id: str, + attributes: Optional[Dict[str, Union[str, float, bool]]], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """ + Update attributes on a vector store file. + + Args: + attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters, booleans, or numbers. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/vector_stores/{vector_store_id}/files/{file_id}", + body=maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFile, + ) + + def list( + self, + vector_store_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[VectorStoreFile]: + """ + Returns a list of vector store files. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/vector_stores/{vector_store_id}/files", + page=SyncCursorPage[VectorStoreFile], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "filter": filter, + "limit": limit, + "order": order, + }, + file_list_params.FileListParams, + ), + ), + model=VectorStoreFile, + ) + + def delete( + self, + file_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileDeleted: + """Delete a vector store file. + + This will remove the file from the vector store but + the file itself will not be deleted. To delete the file, use the + [delete file](https://platform.openai.com/docs/api-reference/files/delete) + endpoint. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._delete( + f"/vector_stores/{vector_store_id}/files/{file_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFileDeleted, + ) + + def create_and_poll( + self, + file_id: str, + *, + vector_store_id: str, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """Attach a file to the given vector store and wait for it to be processed.""" + self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy) + + return self.poll( + file_id, + vector_store_id=vector_store_id, + poll_interval_ms=poll_interval_ms, + ) + + def poll( + self, + file_id: str, + *, + vector_store_id: str, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """Wait for the vector store file to finish processing. + + Note: this will return even if the file failed to process, you need to check + file.last_error and file.status to handle these cases + """ + headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"} + if is_given(poll_interval_ms): + headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms) + + while True: + response = self.with_raw_response.retrieve( + file_id, + vector_store_id=vector_store_id, + extra_headers=headers, + ) + + file = response.parse() + if file.status == "in_progress": + if not is_given(poll_interval_ms): + from_header = response.headers.get("openai-poll-after-ms") + if from_header is not None: + poll_interval_ms = int(from_header) + else: + poll_interval_ms = 1000 + + self._sleep(poll_interval_ms / 1000) + elif file.status == "cancelled" or file.status == "completed" or file.status == "failed": + return file + else: + if TYPE_CHECKING: # type: ignore[unreachable] + assert_never(file.status) + else: + return file + + def upload( + self, + *, + vector_store_id: str, + file: FileTypes, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """Upload a file to the `files` API and then attach it to the given vector store. + + Note the file will be asynchronously processed (you can use the alternative + polling helper method to wait for processing to complete). + """ + file_obj = self._client.files.create(file=file, purpose="assistants") + return self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy) + + def upload_and_poll( + self, + *, + vector_store_id: str, + file: FileTypes, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """Add a file to a vector store and poll until processing is complete.""" + file_obj = self._client.files.create(file=file, purpose="assistants") + return self.create_and_poll( + vector_store_id=vector_store_id, + file_id=file_obj.id, + chunking_strategy=chunking_strategy, + poll_interval_ms=poll_interval_ms, + ) + + def content( + self, + file_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncPage[FileContentResponse]: + """ + Retrieve the parsed contents of a vector store file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/vector_stores/{vector_store_id}/files/{file_id}/content", + page=SyncPage[FileContentResponse], + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + model=FileContentResponse, + ) + + +class AsyncFiles(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncFilesWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncFilesWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncFilesWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncFilesWithStreamingResponse(self) + + async def create( + self, + vector_store_id: str, + *, + file_id: str, + attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """ + Create a vector store file by attaching a + [File](https://platform.openai.com/docs/api-reference/files) to a + [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object). + + Args: + file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the + vector store should use. Useful for tools like `file_search` that can access + files. + + attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters, booleans, or numbers. + + chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` + strategy. Only applicable if `file_ids` is non-empty. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/vector_stores/{vector_store_id}/files", + body=await async_maybe_transform( + { + "file_id": file_id, + "attributes": attributes, + "chunking_strategy": chunking_strategy, + }, + file_create_params.FileCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFile, + ) + + async def retrieve( + self, + file_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """ + Retrieves a vector store file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._get( + f"/vector_stores/{vector_store_id}/files/{file_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFile, + ) + + async def update( + self, + file_id: str, + *, + vector_store_id: str, + attributes: Optional[Dict[str, Union[str, float, bool]]], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """ + Update attributes on a vector store file. + + Args: + attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters, booleans, or numbers. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/vector_stores/{vector_store_id}/files/{file_id}", + body=await async_maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFile, + ) + + def list( + self, + vector_store_id: str, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]: + """ + Returns a list of vector store files. + + Args: + after: A cursor for use in pagination. `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/vector_stores/{vector_store_id}/files", + page=AsyncCursorPage[VectorStoreFile], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "filter": filter, + "limit": limit, + "order": order, + }, + file_list_params.FileListParams, + ), + ), + model=VectorStoreFile, + ) + + async def delete( + self, + file_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreFileDeleted: + """Delete a vector store file. + + This will remove the file from the vector store but + the file itself will not be deleted. To delete the file, use the + [delete file](https://platform.openai.com/docs/api-reference/files/delete) + endpoint. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._delete( + f"/vector_stores/{vector_store_id}/files/{file_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreFileDeleted, + ) + + async def create_and_poll( + self, + file_id: str, + *, + vector_store_id: str, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """Attach a file to the given vector store and wait for it to be processed.""" + await self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy) + + return await self.poll( + file_id, + vector_store_id=vector_store_id, + poll_interval_ms=poll_interval_ms, + ) + + async def poll( + self, + file_id: str, + *, + vector_store_id: str, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """Wait for the vector store file to finish processing. + + Note: this will return even if the file failed to process, you need to check + file.last_error and file.status to handle these cases + """ + headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"} + if is_given(poll_interval_ms): + headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms) + + while True: + response = await self.with_raw_response.retrieve( + file_id, + vector_store_id=vector_store_id, + extra_headers=headers, + ) + + file = response.parse() + if file.status == "in_progress": + if not is_given(poll_interval_ms): + from_header = response.headers.get("openai-poll-after-ms") + if from_header is not None: + poll_interval_ms = int(from_header) + else: + poll_interval_ms = 1000 + + await self._sleep(poll_interval_ms / 1000) + elif file.status == "cancelled" or file.status == "completed" or file.status == "failed": + return file + else: + if TYPE_CHECKING: # type: ignore[unreachable] + assert_never(file.status) + else: + return file + + async def upload( + self, + *, + vector_store_id: str, + file: FileTypes, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """Upload a file to the `files` API and then attach it to the given vector store. + + Note the file will be asynchronously processed (you can use the alternative + polling helper method to wait for processing to complete). + """ + file_obj = await self._client.files.create(file=file, purpose="assistants") + return await self.create( + vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy + ) + + async def upload_and_poll( + self, + *, + vector_store_id: str, + file: FileTypes, + poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + ) -> VectorStoreFile: + """Add a file to a vector store and poll until processing is complete.""" + file_obj = await self._client.files.create(file=file, purpose="assistants") + return await self.create_and_poll( + vector_store_id=vector_store_id, + file_id=file_obj.id, + poll_interval_ms=poll_interval_ms, + chunking_strategy=chunking_strategy, + ) + + def content( + self, + file_id: str, + *, + vector_store_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[FileContentResponse, AsyncPage[FileContentResponse]]: + """ + Retrieve the parsed contents of a vector store file. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/vector_stores/{vector_store_id}/files/{file_id}/content", + page=AsyncPage[FileContentResponse], + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + model=FileContentResponse, + ) + + +class FilesWithRawResponse: + def __init__(self, files: Files) -> None: + self._files = files + + self.create = _legacy_response.to_raw_response_wrapper( + files.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + files.retrieve, + ) + self.update = _legacy_response.to_raw_response_wrapper( + files.update, + ) + self.list = _legacy_response.to_raw_response_wrapper( + files.list, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + files.delete, + ) + self.content = _legacy_response.to_raw_response_wrapper( + files.content, + ) + + +class AsyncFilesWithRawResponse: + def __init__(self, files: AsyncFiles) -> None: + self._files = files + + self.create = _legacy_response.async_to_raw_response_wrapper( + files.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + files.retrieve, + ) + self.update = _legacy_response.async_to_raw_response_wrapper( + files.update, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + files.list, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + files.delete, + ) + self.content = _legacy_response.async_to_raw_response_wrapper( + files.content, + ) + + +class FilesWithStreamingResponse: + def __init__(self, files: Files) -> None: + self._files = files + + self.create = to_streamed_response_wrapper( + files.create, + ) + self.retrieve = to_streamed_response_wrapper( + files.retrieve, + ) + self.update = to_streamed_response_wrapper( + files.update, + ) + self.list = to_streamed_response_wrapper( + files.list, + ) + self.delete = to_streamed_response_wrapper( + files.delete, + ) + self.content = to_streamed_response_wrapper( + files.content, + ) + + +class AsyncFilesWithStreamingResponse: + def __init__(self, files: AsyncFiles) -> None: + self._files = files + + self.create = async_to_streamed_response_wrapper( + files.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + files.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + files.update, + ) + self.list = async_to_streamed_response_wrapper( + files.list, + ) + self.delete = async_to_streamed_response_wrapper( + files.delete, + ) + self.content = async_to_streamed_response_wrapper( + files.content, + ) diff --git a/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/vector_stores.py b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/vector_stores.py new file mode 100644 index 00000000..aaa6ed27 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/vector_stores/vector_stores.py @@ -0,0 +1,868 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Union, Optional +from typing_extensions import Literal + +import httpx + +from ... import _legacy_response +from .files import ( + Files, + AsyncFiles, + FilesWithRawResponse, + AsyncFilesWithRawResponse, + FilesWithStreamingResponse, + AsyncFilesWithStreamingResponse, +) +from ...types import ( + FileChunkingStrategyParam, + vector_store_list_params, + vector_store_create_params, + vector_store_search_params, + vector_store_update_params, +) +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._utils import ( + maybe_transform, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage +from .file_batches import ( + FileBatches, + AsyncFileBatches, + FileBatchesWithRawResponse, + AsyncFileBatchesWithRawResponse, + FileBatchesWithStreamingResponse, + AsyncFileBatchesWithStreamingResponse, +) +from ..._base_client import AsyncPaginator, make_request_options +from ...types.vector_store import VectorStore +from ...types.vector_store_deleted import VectorStoreDeleted +from ...types.shared_params.metadata import Metadata +from ...types.file_chunking_strategy_param import FileChunkingStrategyParam +from ...types.vector_store_search_response import VectorStoreSearchResponse + +__all__ = ["VectorStores", "AsyncVectorStores"] + + +class VectorStores(SyncAPIResource): + @cached_property + def files(self) -> Files: + return Files(self._client) + + @cached_property + def file_batches(self) -> FileBatches: + return FileBatches(self._client) + + @cached_property + def with_raw_response(self) -> VectorStoresWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return VectorStoresWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> VectorStoresWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return VectorStoresWithStreamingResponse(self) + + def create( + self, + *, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN, + file_ids: List[str] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + name: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStore: + """ + Create a vector store. + + Args: + chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` + strategy. Only applicable if `file_ids` is non-empty. + + expires_after: The expiration policy for a vector store. + + file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that + the vector store should use. Useful for tools like `file_search` that can access + files. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + name: The name of the vector store. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + "/vector_stores", + body=maybe_transform( + { + "chunking_strategy": chunking_strategy, + "expires_after": expires_after, + "file_ids": file_ids, + "metadata": metadata, + "name": name, + }, + vector_store_create_params.VectorStoreCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStore, + ) + + def retrieve( + self, + vector_store_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStore: + """ + Retrieves a vector store. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get( + f"/vector_stores/{vector_store_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStore, + ) + + def update( + self, + vector_store_id: str, + *, + expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + name: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStore: + """ + Modifies a vector store. + + Args: + expires_after: The expiration policy for a vector store. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + name: The name of the vector store. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + f"/vector_stores/{vector_store_id}", + body=maybe_transform( + { + "expires_after": expires_after, + "metadata": metadata, + "name": name, + }, + vector_store_update_params.VectorStoreUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStore, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[VectorStore]: + """Returns a list of vector stores. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + "/vector_stores", + page=SyncCursorPage[VectorStore], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + }, + vector_store_list_params.VectorStoreListParams, + ), + ), + model=VectorStore, + ) + + def delete( + self, + vector_store_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreDeleted: + """ + Delete a vector store. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._delete( + f"/vector_stores/{vector_store_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreDeleted, + ) + + def search( + self, + vector_store_id: str, + *, + query: Union[str, List[str]], + filters: vector_store_search_params.Filters | NotGiven = NOT_GIVEN, + max_num_results: int | NotGiven = NOT_GIVEN, + ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN, + rewrite_query: bool | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncPage[VectorStoreSearchResponse]: + """ + Search a vector store for relevant chunks based on a query and file attributes + filter. + + Args: + query: A query string for a search + + filters: A filter to apply based on file attributes. + + max_num_results: The maximum number of results to return. This number should be between 1 and 50 + inclusive. + + ranking_options: Ranking options for search. + + rewrite_query: Whether to rewrite the natural language query for vector search. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/vector_stores/{vector_store_id}/search", + page=SyncPage[VectorStoreSearchResponse], + body=maybe_transform( + { + "query": query, + "filters": filters, + "max_num_results": max_num_results, + "ranking_options": ranking_options, + "rewrite_query": rewrite_query, + }, + vector_store_search_params.VectorStoreSearchParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + model=VectorStoreSearchResponse, + method="post", + ) + + +class AsyncVectorStores(AsyncAPIResource): + @cached_property + def files(self) -> AsyncFiles: + return AsyncFiles(self._client) + + @cached_property + def file_batches(self) -> AsyncFileBatches: + return AsyncFileBatches(self._client) + + @cached_property + def with_raw_response(self) -> AsyncVectorStoresWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncVectorStoresWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncVectorStoresWithStreamingResponse(self) + + async def create( + self, + *, + chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN, + expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN, + file_ids: List[str] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + name: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStore: + """ + Create a vector store. + + Args: + chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` + strategy. Only applicable if `file_ids` is non-empty. + + expires_after: The expiration policy for a vector store. + + file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that + the vector store should use. Useful for tools like `file_search` that can access + files. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + name: The name of the vector store. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + "/vector_stores", + body=await async_maybe_transform( + { + "chunking_strategy": chunking_strategy, + "expires_after": expires_after, + "file_ids": file_ids, + "metadata": metadata, + "name": name, + }, + vector_store_create_params.VectorStoreCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStore, + ) + + async def retrieve( + self, + vector_store_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStore: + """ + Retrieves a vector store. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._get( + f"/vector_stores/{vector_store_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStore, + ) + + async def update( + self, + vector_store_id: str, + *, + expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN, + metadata: Optional[Metadata] | NotGiven = NOT_GIVEN, + name: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStore: + """ + Modifies a vector store. + + Args: + expires_after: The expiration policy for a vector store. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + name: The name of the vector store. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + f"/vector_stores/{vector_store_id}", + body=await async_maybe_transform( + { + "expires_after": expires_after, + "metadata": metadata, + "name": name, + }, + vector_store_update_params.VectorStoreUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStore, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + before: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[VectorStore, AsyncCursorPage[VectorStore]]: + """Returns a list of vector stores. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + before: A cursor for use in pagination. `before` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + starting with obj_foo, your subsequent call can include before=obj_foo in order + to fetch the previous page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + "/vector_stores", + page=AsyncCursorPage[VectorStore], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "before": before, + "limit": limit, + "order": order, + }, + vector_store_list_params.VectorStoreListParams, + ), + ), + model=VectorStore, + ) + + async def delete( + self, + vector_store_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorStoreDeleted: + """ + Delete a vector store. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._delete( + f"/vector_stores/{vector_store_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorStoreDeleted, + ) + + def search( + self, + vector_store_id: str, + *, + query: Union[str, List[str]], + filters: vector_store_search_params.Filters | NotGiven = NOT_GIVEN, + max_num_results: int | NotGiven = NOT_GIVEN, + ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN, + rewrite_query: bool | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncPaginator[VectorStoreSearchResponse, AsyncPage[VectorStoreSearchResponse]]: + """ + Search a vector store for relevant chunks based on a query and file attributes + filter. + + Args: + query: A query string for a search + + filters: A filter to apply based on file attributes. + + max_num_results: The maximum number of results to return. This number should be between 1 and 50 + inclusive. + + ranking_options: Ranking options for search. + + rewrite_query: Whether to rewrite the natural language query for vector search. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_store_id: + raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._get_api_list( + f"/vector_stores/{vector_store_id}/search", + page=AsyncPage[VectorStoreSearchResponse], + body=maybe_transform( + { + "query": query, + "filters": filters, + "max_num_results": max_num_results, + "ranking_options": ranking_options, + "rewrite_query": rewrite_query, + }, + vector_store_search_params.VectorStoreSearchParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + model=VectorStoreSearchResponse, + method="post", + ) + + +class VectorStoresWithRawResponse: + def __init__(self, vector_stores: VectorStores) -> None: + self._vector_stores = vector_stores + + self.create = _legacy_response.to_raw_response_wrapper( + vector_stores.create, + ) + self.retrieve = _legacy_response.to_raw_response_wrapper( + vector_stores.retrieve, + ) + self.update = _legacy_response.to_raw_response_wrapper( + vector_stores.update, + ) + self.list = _legacy_response.to_raw_response_wrapper( + vector_stores.list, + ) + self.delete = _legacy_response.to_raw_response_wrapper( + vector_stores.delete, + ) + self.search = _legacy_response.to_raw_response_wrapper( + vector_stores.search, + ) + + @cached_property + def files(self) -> FilesWithRawResponse: + return FilesWithRawResponse(self._vector_stores.files) + + @cached_property + def file_batches(self) -> FileBatchesWithRawResponse: + return FileBatchesWithRawResponse(self._vector_stores.file_batches) + + +class AsyncVectorStoresWithRawResponse: + def __init__(self, vector_stores: AsyncVectorStores) -> None: + self._vector_stores = vector_stores + + self.create = _legacy_response.async_to_raw_response_wrapper( + vector_stores.create, + ) + self.retrieve = _legacy_response.async_to_raw_response_wrapper( + vector_stores.retrieve, + ) + self.update = _legacy_response.async_to_raw_response_wrapper( + vector_stores.update, + ) + self.list = _legacy_response.async_to_raw_response_wrapper( + vector_stores.list, + ) + self.delete = _legacy_response.async_to_raw_response_wrapper( + vector_stores.delete, + ) + self.search = _legacy_response.async_to_raw_response_wrapper( + vector_stores.search, + ) + + @cached_property + def files(self) -> AsyncFilesWithRawResponse: + return AsyncFilesWithRawResponse(self._vector_stores.files) + + @cached_property + def file_batches(self) -> AsyncFileBatchesWithRawResponse: + return AsyncFileBatchesWithRawResponse(self._vector_stores.file_batches) + + +class VectorStoresWithStreamingResponse: + def __init__(self, vector_stores: VectorStores) -> None: + self._vector_stores = vector_stores + + self.create = to_streamed_response_wrapper( + vector_stores.create, + ) + self.retrieve = to_streamed_response_wrapper( + vector_stores.retrieve, + ) + self.update = to_streamed_response_wrapper( + vector_stores.update, + ) + self.list = to_streamed_response_wrapper( + vector_stores.list, + ) + self.delete = to_streamed_response_wrapper( + vector_stores.delete, + ) + self.search = to_streamed_response_wrapper( + vector_stores.search, + ) + + @cached_property + def files(self) -> FilesWithStreamingResponse: + return FilesWithStreamingResponse(self._vector_stores.files) + + @cached_property + def file_batches(self) -> FileBatchesWithStreamingResponse: + return FileBatchesWithStreamingResponse(self._vector_stores.file_batches) + + +class AsyncVectorStoresWithStreamingResponse: + def __init__(self, vector_stores: AsyncVectorStores) -> None: + self._vector_stores = vector_stores + + self.create = async_to_streamed_response_wrapper( + vector_stores.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + vector_stores.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + vector_stores.update, + ) + self.list = async_to_streamed_response_wrapper( + vector_stores.list, + ) + self.delete = async_to_streamed_response_wrapper( + vector_stores.delete, + ) + self.search = async_to_streamed_response_wrapper( + vector_stores.search, + ) + + @cached_property + def files(self) -> AsyncFilesWithStreamingResponse: + return AsyncFilesWithStreamingResponse(self._vector_stores.files) + + @cached_property + def file_batches(self) -> AsyncFileBatchesWithStreamingResponse: + return AsyncFileBatchesWithStreamingResponse(self._vector_stores.file_batches) |