1 files changed, 383 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py
new file mode 100644
index 00000000..5884e54d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py
@@ -0,0 +1,383 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+        | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through
+              [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+              and should be treated as guidance of input audio content rather than precisely
+              what the model heard. The client can optionally set the language and prompt for
+              transcription, these offer additional guidance to the transcription service.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          model: The Realtime model used for this session.
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+              temperature of 0.8 is highly recommended for best performance.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/realtime/sessions",
+            body=maybe_transform(
+                {
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "model": model,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class AsyncSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+        | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through
+              [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+              and should be treated as guidance of input audio content rather than precisely
+              what the model heard. The client can optionally set the language and prompt for
+              transcription, these offer additional guidance to the transcription service.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          model: The Realtime model used for this session.
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+              temperature of 0.8 is highly recommended for best performance.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/sessions",
+            body=await async_maybe_transform(
+                {
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "model": model,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class SessionsWithRawResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithRawResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class SessionsWithStreamingResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = to_streamed_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithStreamingResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            sessions.create,
+        )