aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/openai/resources/beta/realtime')
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py47
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py1066
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py383
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py277
4 files changed, 1773 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py
new file mode 100644
index 00000000..7ab3d993
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+ Realtime,
+ AsyncRealtime,
+ RealtimeWithRawResponse,
+ AsyncRealtimeWithRawResponse,
+ RealtimeWithStreamingResponse,
+ AsyncRealtimeWithStreamingResponse,
+)
+from .sessions import (
+ Sessions,
+ AsyncSessions,
+ SessionsWithRawResponse,
+ AsyncSessionsWithRawResponse,
+ SessionsWithStreamingResponse,
+ AsyncSessionsWithStreamingResponse,
+)
+from .transcription_sessions import (
+ TranscriptionSessions,
+ AsyncTranscriptionSessions,
+ TranscriptionSessionsWithRawResponse,
+ AsyncTranscriptionSessionsWithRawResponse,
+ TranscriptionSessionsWithStreamingResponse,
+ AsyncTranscriptionSessionsWithStreamingResponse,
+)
+
+__all__ = [
+ "Sessions",
+ "AsyncSessions",
+ "SessionsWithRawResponse",
+ "AsyncSessionsWithRawResponse",
+ "SessionsWithStreamingResponse",
+ "AsyncSessionsWithStreamingResponse",
+ "TranscriptionSessions",
+ "AsyncTranscriptionSessions",
+ "TranscriptionSessionsWithRawResponse",
+ "AsyncTranscriptionSessionsWithRawResponse",
+ "TranscriptionSessionsWithStreamingResponse",
+ "AsyncTranscriptionSessionsWithStreamingResponse",
+ "Realtime",
+ "AsyncRealtime",
+ "RealtimeWithRawResponse",
+ "AsyncRealtimeWithRawResponse",
+ "RealtimeWithStreamingResponse",
+ "AsyncRealtimeWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py
new file mode 100644
index 00000000..76e57f8c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py
@@ -0,0 +1,1066 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
+from .sessions import (
+ Sessions,
+ AsyncSessions,
+ SessionsWithRawResponse,
+ AsyncSessionsWithRawResponse,
+ SessionsWithStreamingResponse,
+ AsyncSessionsWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Query, Headers, NotGiven
+from ...._utils import (
+ is_azure_client,
+ maybe_transform,
+ strip_not_given,
+ async_maybe_transform,
+ is_async_azure_client,
+)
+from ...._compat import cached_property
+from ...._models import construct_type_unchecked
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._exceptions import OpenAIError
+from ...._base_client import _merge_mappings
+from ....types.beta.realtime import (
+ session_update_event_param,
+ response_create_event_param,
+ transcription_session_update_param,
+)
+from .transcription_sessions import (
+ TranscriptionSessions,
+ AsyncTranscriptionSessions,
+ TranscriptionSessionsWithRawResponse,
+ AsyncTranscriptionSessionsWithRawResponse,
+ TranscriptionSessionsWithStreamingResponse,
+ AsyncTranscriptionSessionsWithStreamingResponse,
+)
+from ....types.websocket_connection_options import WebsocketConnectionOptions
+from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
+from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
+from ....types.beta.realtime.conversation_item_param import ConversationItemParam
+from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
+
+if TYPE_CHECKING:
+ from websockets.sync.client import ClientConnection as WebsocketConnection
+ from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+ from ...._client import OpenAI, AsyncOpenAI
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Realtime(SyncAPIResource):
+ @cached_property
+ def sessions(self) -> Sessions:
+ return Sessions(self._client)
+
+ @cached_property
+ def transcription_sessions(self) -> TranscriptionSessions:
+ return TranscriptionSessions(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> RealtimeWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return RealtimeWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return RealtimeWithStreamingResponse(self)
+
+ def connect(
+ self,
+ *,
+ model: str,
+ extra_query: Query = {},
+ extra_headers: Headers = {},
+ websocket_connection_options: WebsocketConnectionOptions = {},
+ ) -> RealtimeConnectionManager:
+ """
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+ Some notable benefits of the API include:
+
+ - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+ - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+ - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+ The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+ """
+ return RealtimeConnectionManager(
+ client=self._client,
+ extra_query=extra_query,
+ extra_headers=extra_headers,
+ websocket_connection_options=websocket_connection_options,
+ model=model,
+ )
+
+
+class AsyncRealtime(AsyncAPIResource):
+ @cached_property
+ def sessions(self) -> AsyncSessions:
+ return AsyncSessions(self._client)
+
+ @cached_property
+ def transcription_sessions(self) -> AsyncTranscriptionSessions:
+ return AsyncTranscriptionSessions(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncRealtimeWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncRealtimeWithStreamingResponse(self)
+
+ def connect(
+ self,
+ *,
+ model: str,
+ extra_query: Query = {},
+ extra_headers: Headers = {},
+ websocket_connection_options: WebsocketConnectionOptions = {},
+ ) -> AsyncRealtimeConnectionManager:
+ """
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+ Some notable benefits of the API include:
+
+ - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+ - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+ - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+ The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+ """
+ return AsyncRealtimeConnectionManager(
+ client=self._client,
+ extra_query=extra_query,
+ extra_headers=extra_headers,
+ websocket_connection_options=websocket_connection_options,
+ model=model,
+ )
+
+
+class RealtimeWithRawResponse:
+ def __init__(self, realtime: Realtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> SessionsWithRawResponse:
+ return SessionsWithRawResponse(self._realtime.sessions)
+
+ @cached_property
+ def transcription_sessions(self) -> TranscriptionSessionsWithRawResponse:
+ return TranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeWithRawResponse:
+ def __init__(self, realtime: AsyncRealtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> AsyncSessionsWithRawResponse:
+ return AsyncSessionsWithRawResponse(self._realtime.sessions)
+
+ @cached_property
+ def transcription_sessions(self) -> AsyncTranscriptionSessionsWithRawResponse:
+ return AsyncTranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
+
+
+class RealtimeWithStreamingResponse:
+ def __init__(self, realtime: Realtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> SessionsWithStreamingResponse:
+ return SessionsWithStreamingResponse(self._realtime.sessions)
+
+ @cached_property
+ def transcription_sessions(self) -> TranscriptionSessionsWithStreamingResponse:
+ return TranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeWithStreamingResponse:
+ def __init__(self, realtime: AsyncRealtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> AsyncSessionsWithStreamingResponse:
+ return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
+
+ @cached_property
+ def transcription_sessions(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
+ return AsyncTranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeConnection:
+ """Represents a live websocket connection to the Realtime API"""
+
+ session: AsyncRealtimeSessionResource
+ response: AsyncRealtimeResponseResource
+ input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+ conversation: AsyncRealtimeConversationResource
+ transcription_session: AsyncRealtimeTranscriptionSessionResource
+
+ _connection: AsyncWebsocketConnection
+
+ def __init__(self, connection: AsyncWebsocketConnection) -> None:
+ self._connection = connection
+
+ self.session = AsyncRealtimeSessionResource(self)
+ self.response = AsyncRealtimeResponseResource(self)
+ self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+ self.conversation = AsyncRealtimeConversationResource(self)
+ self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
+
+ async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+ """
+ An infinite-iterator that will continue to yield events until
+ the connection is closed.
+ """
+ from websockets.exceptions import ConnectionClosedOK
+
+ try:
+ while True:
+ yield await self.recv()
+ except ConnectionClosedOK:
+ return
+
+ async def recv(self) -> RealtimeServerEvent:
+ """
+ Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+ Canceling this method is safe. There's no risk of losing data.
+ """
+ return self.parse_event(await self.recv_bytes())
+
+ async def recv_bytes(self) -> bytes:
+ """Receive the next message from the connection as raw bytes.
+
+ Canceling this method is safe. There's no risk of losing data.
+
+ If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+ then you can call `.parse_event(data)`.
+ """
+ message = await self._connection.recv(decode=False)
+ log.debug(f"Received websocket message: %s", message)
+ if not isinstance(message, bytes):
+ # passing `decode=False` should always result in us getting `bytes` back
+ raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+ return message
+
+ async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+ data = (
+ event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+ if isinstance(event, BaseModel)
+ else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+ )
+ await self._connection.send(data)
+
+ async def close(self, *, code: int = 1000, reason: str = "") -> None:
+ await self._connection.close(code=code, reason=reason)
+
+ def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+ """
+ Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+ This is helpful if you're using `.recv_bytes()`.
+ """
+ return cast(
+ RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+ )
+
+
+class AsyncRealtimeConnectionManager:
+ """
+ Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
+
+ This context manager ensures that the connection will be closed when it exits.
+
+ ---
+
+ Note that if your application doesn't work well with the context manager approach then you
+ can call the `.enter()` method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = await client.beta.realtime.connect(...).enter()
+ # ...
+ await connection.close()
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ client: AsyncOpenAI,
+ model: str,
+ extra_query: Query,
+ extra_headers: Headers,
+ websocket_connection_options: WebsocketConnectionOptions,
+ ) -> None:
+ self.__client = client
+ self.__model = model
+ self.__connection: AsyncRealtimeConnection | None = None
+ self.__extra_query = extra_query
+ self.__extra_headers = extra_headers
+ self.__websocket_connection_options = websocket_connection_options
+
+ async def __aenter__(self) -> AsyncRealtimeConnection:
+ """
+ đź‘‹ If your application doesn't work well with the context manager approach then you
+ can call this method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = await client.beta.realtime.connect(...).enter()
+ # ...
+ await connection.close()
+ ```
+ """
+ try:
+ from websockets.asyncio.client import connect
+ except ImportError as exc:
+ raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+ extra_query = self.__extra_query
+ auth_headers = self.__client.auth_headers
+ if is_async_azure_client(self.__client):
+ url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query)
+ else:
+ url = self._prepare_url().copy_with(
+ params={
+ **self.__client.base_url.params,
+ "model": self.__model,
+ **extra_query,
+ },
+ )
+ log.debug("Connecting to %s", url)
+ if self.__websocket_connection_options:
+ log.debug("Connection options: %s", self.__websocket_connection_options)
+
+ self.__connection = AsyncRealtimeConnection(
+ await connect(
+ str(url),
+ user_agent_header=self.__client.user_agent,
+ additional_headers=_merge_mappings(
+ {
+ **auth_headers,
+ "OpenAI-Beta": "realtime=v1",
+ },
+ self.__extra_headers,
+ ),
+ **self.__websocket_connection_options,
+ )
+ )
+
+ return self.__connection
+
+ enter = __aenter__
+
+ def _prepare_url(self) -> httpx.URL:
+ if self.__client.websocket_base_url is not None:
+ base_url = httpx.URL(self.__client.websocket_base_url)
+ else:
+ base_url = self.__client._base_url.copy_with(scheme="wss")
+
+ merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+ return base_url.copy_with(raw_path=merge_raw_path)
+
+ async def __aexit__(
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+ ) -> None:
+ if self.__connection is not None:
+ await self.__connection.close()
+
+
+class RealtimeConnection:
+ """Represents a live websocket connection to the Realtime API"""
+
+ session: RealtimeSessionResource
+ response: RealtimeResponseResource
+ input_audio_buffer: RealtimeInputAudioBufferResource
+ conversation: RealtimeConversationResource
+ transcription_session: RealtimeTranscriptionSessionResource
+
+ _connection: WebsocketConnection
+
+ def __init__(self, connection: WebsocketConnection) -> None:
+ self._connection = connection
+
+ self.session = RealtimeSessionResource(self)
+ self.response = RealtimeResponseResource(self)
+ self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+ self.conversation = RealtimeConversationResource(self)
+ self.transcription_session = RealtimeTranscriptionSessionResource(self)
+
+ def __iter__(self) -> Iterator[RealtimeServerEvent]:
+ """
+ An infinite-iterator that will continue to yield events until
+ the connection is closed.
+ """
+ from websockets.exceptions import ConnectionClosedOK
+
+ try:
+ while True:
+ yield self.recv()
+ except ConnectionClosedOK:
+ return
+
+ def recv(self) -> RealtimeServerEvent:
+ """
+ Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+ Canceling this method is safe. There's no risk of losing data.
+ """
+ return self.parse_event(self.recv_bytes())
+
+ def recv_bytes(self) -> bytes:
+ """Receive the next message from the connection as raw bytes.
+
+ Canceling this method is safe. There's no risk of losing data.
+
+ If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+ then you can call `.parse_event(data)`.
+ """
+ message = self._connection.recv(decode=False)
+ log.debug(f"Received websocket message: %s", message)
+ if not isinstance(message, bytes):
+ # passing `decode=False` should always result in us getting `bytes` back
+ raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+ return message
+
+ def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+ data = (
+ event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+ if isinstance(event, BaseModel)
+ else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+ )
+ self._connection.send(data)
+
+ def close(self, *, code: int = 1000, reason: str = "") -> None:
+ self._connection.close(code=code, reason=reason)
+
+ def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+ """
+ Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+ This is helpful if you're using `.recv_bytes()`.
+ """
+ return cast(
+ RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+ )
+
+
+class RealtimeConnectionManager:
+ """
+ Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
+
+ This context manager ensures that the connection will be closed when it exits.
+
+ ---
+
+ Note that if your application doesn't work well with the context manager approach then you
+ can call the `.enter()` method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = client.beta.realtime.connect(...).enter()
+ # ...
+ connection.close()
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ client: OpenAI,
+ model: str,
+ extra_query: Query,
+ extra_headers: Headers,
+ websocket_connection_options: WebsocketConnectionOptions,
+ ) -> None:
+ self.__client = client
+ self.__model = model
+ self.__connection: RealtimeConnection | None = None
+ self.__extra_query = extra_query
+ self.__extra_headers = extra_headers
+ self.__websocket_connection_options = websocket_connection_options
+
+ def __enter__(self) -> RealtimeConnection:
+ """
+ đź‘‹ If your application doesn't work well with the context manager approach then you
+ can call this method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = client.beta.realtime.connect(...).enter()
+ # ...
+ connection.close()
+ ```
+ """
+ try:
+ from websockets.sync.client import connect
+ except ImportError as exc:
+ raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+ extra_query = self.__extra_query
+ auth_headers = self.__client.auth_headers
+ if is_azure_client(self.__client):
+ url, auth_headers = self.__client._configure_realtime(self.__model, extra_query)
+ else:
+ url = self._prepare_url().copy_with(
+ params={
+ **self.__client.base_url.params,
+ "model": self.__model,
+ **extra_query,
+ },
+ )
+ log.debug("Connecting to %s", url)
+ if self.__websocket_connection_options:
+ log.debug("Connection options: %s", self.__websocket_connection_options)
+
+ self.__connection = RealtimeConnection(
+ connect(
+ str(url),
+ user_agent_header=self.__client.user_agent,
+ additional_headers=_merge_mappings(
+ {
+ **auth_headers,
+ "OpenAI-Beta": "realtime=v1",
+ },
+ self.__extra_headers,
+ ),
+ **self.__websocket_connection_options,
+ )
+ )
+
+ return self.__connection
+
+ enter = __enter__
+
+ def _prepare_url(self) -> httpx.URL:
+ if self.__client.websocket_base_url is not None:
+ base_url = httpx.URL(self.__client.websocket_base_url)
+ else:
+ base_url = self.__client._base_url.copy_with(scheme="wss")
+
+ merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+ return base_url.copy_with(raw_path=merge_raw_path)
+
+ def __exit__(
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+ ) -> None:
+ if self.__connection is not None:
+ self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+ def __init__(self, connection: RealtimeConnection) -> None:
+ self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+ def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to update the session’s default configuration.
+ The client may send this event at any time to update any field,
+ except for `voice`. However, note that once a session has been
+ initialized with a particular `model`, it can’t be changed to
+ another model using `session.update`.
+
+ When the server receives a `session.update`, it will respond
+ with a `session.updated` event showing the full, effective configuration.
+ Only the fields that are present are updated. To clear a field like
+ `instructions`, pass an empty string.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+ def create(
+ self,
+ *,
+ event_id: str | NotGiven = NOT_GIVEN,
+ response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ This event instructs the server to create a Response, which means triggering
+ model inference. When in Server VAD mode, the server will create Responses
+ automatically.
+
+ A Response will include at least one Item, and may have two, in which case
+ the second will be a function call. These Items will be appended to the
+ conversation history.
+
+ The server will respond with a `response.created` event, events for Items
+ and content created, and finally a `response.done` event to indicate the
+ Response is complete.
+
+ The `response.create` event includes inference configuration like
+ `instructions`, and `temperature`. These fields will override the Session's
+ configuration for this Response only.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+ )
+ )
+
+ def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to cancel an in-progress response.
+
+ The server will respond
+ with a `response.cancelled` event or an error if there is no response to
+ cancel.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+ )
+ )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+ def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to clear the audio bytes in the buffer.
+
+ The server will
+ respond with an `input_audio_buffer.cleared` event.
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+ )
+
+ def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to commit the user input audio buffer, which will create a
+ new user message item in the conversation. This event will produce an error
+ if the input audio buffer is empty. When in Server VAD mode, the client does
+ not need to send this event, the server will commit the audio buffer
+ automatically.
+
+ Committing the input audio buffer will trigger input audio transcription
+ (if enabled in session configuration), but it will not create a response
+ from the model. The server will respond with an `input_audio_buffer.committed`
+ event.
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+ )
+
+ def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to append audio bytes to the input audio buffer.
+
+ The audio
+ buffer is temporary storage you can write to and later commit. In Server VAD
+ mode, the audio buffer is used to detect speech and the server will decide
+ when to commit. When Server VAD is disabled, you must commit the audio buffer
+ manually.
+
+ The client may choose how much audio to place in each event up to a maximum
+ of 15 MiB, for example streaming smaller chunks from the client may allow the
+ VAD to be more responsive. Unlike made other client events, the server will
+ not send a confirmation response to this event.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+ @cached_property
+ def item(self) -> RealtimeConversationItemResource:
+ return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+ def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event when you want to remove any item from the conversation
+ history.
+
+ The server will respond with a `conversation.item.deleted` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+ def create(
+ self,
+ *,
+ item: ConversationItemParam,
+ event_id: str | NotGiven = NOT_GIVEN,
+ previous_item_id: str | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ Add a new Item to the Conversation's context, including messages, function
+ calls, and function call responses. This event can be used both to populate a
+ "history" of the conversation and to add new items mid-stream, but has the
+ current limitation that it cannot populate assistant audio messages.
+
+ If successful, the server will respond with a `conversation.item.created`
+ event, otherwise an `error` event will be sent.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.create",
+ "item": item,
+ "event_id": event_id,
+ "previous_item_id": previous_item_id,
+ }
+ ),
+ )
+ )
+
+ def truncate(
+ self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to truncate a previous assistant message’s audio.
+
+ The server
+ will produce audio faster than realtime, so this event is useful when the user
+ interrupts to truncate audio that has already been sent to the client but not
+ yet played. This will synchronize the server's understanding of the audio with
+ the client's playback.
+
+ Truncating audio will delete the server-side text transcript to ensure there
+ is not text in the context that hasn't been heard by the user.
+
+ If successful, the server will respond with a `conversation.item.truncated`
+ event.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.truncate",
+ "audio_end_ms": audio_end_ms,
+ "content_index": content_index,
+ "item_id": item_id,
+ "event_id": event_id,
+ }
+ ),
+ )
+ )
+
+ def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+ The server will respond with a `conversation.item.retrieved` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
+ def update(
+ self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to update a transcription session."""
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class BaseAsyncRealtimeConnectionResource:
+ def __init__(self, connection: AsyncRealtimeConnection) -> None:
+ self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+ async def update(
+ self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """
+ Send this event to update the session’s default configuration.
+ The client may send this event at any time to update any field,
+ except for `voice`. However, note that once a session has been
+ initialized with a particular `model`, it can’t be changed to
+ another model using `session.update`.
+
+ When the server receives a `session.update`, it will respond
+ with a `session.updated` event showing the full, effective configuration.
+ Only the fields that are present are updated. To clear a field like
+ `instructions`, pass an empty string.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+ async def create(
+ self,
+ *,
+ event_id: str | NotGiven = NOT_GIVEN,
+ response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ This event instructs the server to create a Response, which means triggering
+ model inference. When in Server VAD mode, the server will create Responses
+ automatically.
+
+ A Response will include at least one Item, and may have two, in which case
+ the second will be a function call. These Items will be appended to the
+ conversation history.
+
+ The server will respond with a `response.created` event, events for Items
+ and content created, and finally a `response.done` event to indicate the
+ Response is complete.
+
+ The `response.create` event includes inference configuration like
+ `instructions`, and `temperature`. These fields will override the Session's
+ configuration for this Response only.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+ )
+ )
+
+ async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to cancel an in-progress response.
+
+ The server will respond
+ with a `response.cancelled` event or an error if there is no response to
+ cancel.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+ )
+ )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+ async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to clear the audio bytes in the buffer.
+
+ The server will
+ respond with an `input_audio_buffer.cleared` event.
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+ )
+
+ async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to commit the user input audio buffer, which will create a
+ new user message item in the conversation. This event will produce an error
+ if the input audio buffer is empty. When in Server VAD mode, the client does
+ not need to send this event, the server will commit the audio buffer
+ automatically.
+
+ Committing the input audio buffer will trigger input audio transcription
+ (if enabled in session configuration), but it will not create a response
+ from the model. The server will respond with an `input_audio_buffer.committed`
+ event.
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+ )
+
+ async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to append audio bytes to the input audio buffer.
+
+ The audio
+ buffer is temporary storage you can write to and later commit. In Server VAD
+ mode, the audio buffer is used to detect speech and the server will decide
+ when to commit. When Server VAD is disabled, you must commit the audio buffer
+ manually.
+
+ The client may choose how much audio to place in each event up to a maximum
+ of 15 MiB, for example streaming smaller chunks from the client may allow the
+ VAD to be more responsive. Unlike made other client events, the server will
+ not send a confirmation response to this event.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+ @cached_property
+ def item(self) -> AsyncRealtimeConversationItemResource:
+ return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+ async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event when you want to remove any item from the conversation
+ history.
+
+ The server will respond with a `conversation.item.deleted` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+ async def create(
+ self,
+ *,
+ item: ConversationItemParam,
+ event_id: str | NotGiven = NOT_GIVEN,
+ previous_item_id: str | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ Add a new Item to the Conversation's context, including messages, function
+ calls, and function call responses. This event can be used both to populate a
+ "history" of the conversation and to add new items mid-stream, but has the
+ current limitation that it cannot populate assistant audio messages.
+
+ If successful, the server will respond with a `conversation.item.created`
+ event, otherwise an `error` event will be sent.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.create",
+ "item": item,
+ "event_id": event_id,
+ "previous_item_id": previous_item_id,
+ }
+ ),
+ )
+ )
+
+ async def truncate(
+ self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to truncate a previous assistant message’s audio.
+
+ The server
+ will produce audio faster than realtime, so this event is useful when the user
+ interrupts to truncate audio that has already been sent to the client but not
+ yet played. This will synchronize the server's understanding of the audio with
+ the client's playback.
+
+ Truncating audio will delete the server-side text transcript to ensure there
+ is not text in the context that hasn't been heard by the user.
+
+ If successful, the server will respond with a `conversation.item.truncated`
+ event.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.truncate",
+ "audio_end_ms": audio_end_ms,
+ "content_index": content_index,
+ "item_id": item_id,
+ "event_id": event_id,
+ }
+ ),
+ )
+ )
+
+ async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+ The server will respond with a `conversation.item.retrieved` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
+ async def update(
+ self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to update a transcription session."""
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+ )
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py
new file mode 100644
index 00000000..5884e54d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py
@@ -0,0 +1,383 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> SessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return SessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> SessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return SessionsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+ input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ instructions: str | NotGiven = NOT_GIVEN,
+ max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ model: Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ]
+ | NotGiven = NOT_GIVEN,
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: str | NotGiven = NOT_GIVEN,
+ tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+ turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SessionCreateResponse:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API. Can be configured with the same session parameters as the
+ `session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+ (mono), and little-endian byte order.
+
+ input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+ off. Noise reduction filters audio added to the input audio buffer before it is
+ sent to VAD and the model. Filtering the audio can improve VAD and turn
+ detection accuracy (reducing false positives) and model performance by improving
+ perception of the input audio.
+
+ input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through
+ [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+ and should be treated as guidance of input audio content rather than precisely
+ what the model heard. The client can optionally set the language and prompt for
+ transcription, these offer additional guidance to the transcription service.
+
+ instructions: The default system instructions (i.e. system message) prepended to model calls.
+ This field allows the client to guide the model on desired responses. The model
+ can be instructed on response content and format, (e.g. "be extremely succinct",
+ "act friendly", "here are examples of good responses") and on audio behavior
+ (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+ instructions are not guaranteed to be followed by the model, but they provide
+ guidance to the model on the desired behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+
+ max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ model: The Realtime model used for this session.
+
+ output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+ For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+ temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+ temperature of 0.8 is highly recommended for best performance.
+
+ tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+ a function.
+
+ tools: Tools (functions) available to the model.
+
+ turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+ set to `null` to turn off, in which case the client must manually trigger model
+ response. Server VAD means that the model will detect the start and end of
+ speech based on audio volume and respond at the end of user speech. Semantic VAD
+ is more advanced and uses a turn detection model (in conjuction with VAD) to
+ semantically estimate whether the user has finished speaking, then dynamically
+ sets a timeout based on this probability. For example, if user audio trails off
+ with "uhhm", the model will score a low probability of turn end and wait longer
+ for the user to continue speaking. This can be useful for more natural
+ conversations, but may have a higher latency.
+
+ voice: The voice the model uses to respond. Voice cannot be changed during the session
+ once the model has responded with audio at least once. Current voice options are
+ `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/realtime/sessions",
+ body=maybe_transform(
+ {
+ "input_audio_format": input_audio_format,
+ "input_audio_noise_reduction": input_audio_noise_reduction,
+ "input_audio_transcription": input_audio_transcription,
+ "instructions": instructions,
+ "max_response_output_tokens": max_response_output_tokens,
+ "modalities": modalities,
+ "model": model,
+ "output_audio_format": output_audio_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "turn_detection": turn_detection,
+ "voice": voice,
+ },
+ session_create_params.SessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=SessionCreateResponse,
+ )
+
+
+class AsyncSessions(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncSessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncSessionsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+ input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ instructions: str | NotGiven = NOT_GIVEN,
+ max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ model: Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ]
+ | NotGiven = NOT_GIVEN,
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: str | NotGiven = NOT_GIVEN,
+ tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+ turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SessionCreateResponse:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API. Can be configured with the same session parameters as the
+ `session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+ (mono), and little-endian byte order.
+
+ input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+ off. Noise reduction filters audio added to the input audio buffer before it is
+ sent to VAD and the model. Filtering the audio can improve VAD and turn
+ detection accuracy (reducing false positives) and model performance by improving
+ perception of the input audio.
+
+ input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through
+ [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+ and should be treated as guidance of input audio content rather than precisely
+ what the model heard. The client can optionally set the language and prompt for
+ transcription, these offer additional guidance to the transcription service.
+
+ instructions: The default system instructions (i.e. system message) prepended to model calls.
+ This field allows the client to guide the model on desired responses. The model
+ can be instructed on response content and format, (e.g. "be extremely succinct",
+ "act friendly", "here are examples of good responses") and on audio behavior
+ (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+ instructions are not guaranteed to be followed by the model, but they provide
+ guidance to the model on the desired behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+
+ max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ model: The Realtime model used for this session.
+
+ output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+ For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+ temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+ temperature of 0.8 is highly recommended for best performance.
+
+ tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+ a function.
+
+ tools: Tools (functions) available to the model.
+
+ turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+ set to `null` to turn off, in which case the client must manually trigger model
+ response. Server VAD means that the model will detect the start and end of
+ speech based on audio volume and respond at the end of user speech. Semantic VAD
+ is more advanced and uses a turn detection model (in conjuction with VAD) to
+ semantically estimate whether the user has finished speaking, then dynamically
+ sets a timeout based on this probability. For example, if user audio trails off
+ with "uhhm", the model will score a low probability of turn end and wait longer
+ for the user to continue speaking. This can be useful for more natural
+ conversations, but may have a higher latency.
+
+ voice: The voice the model uses to respond. Voice cannot be changed during the session
+ once the model has responded with audio at least once. Current voice options are
+ `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/realtime/sessions",
+ body=await async_maybe_transform(
+ {
+ "input_audio_format": input_audio_format,
+ "input_audio_noise_reduction": input_audio_noise_reduction,
+ "input_audio_transcription": input_audio_transcription,
+ "instructions": instructions,
+ "max_response_output_tokens": max_response_output_tokens,
+ "modalities": modalities,
+ "model": model,
+ "output_audio_format": output_audio_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "turn_detection": turn_detection,
+ "voice": voice,
+ },
+ session_create_params.SessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=SessionCreateResponse,
+ )
+
+
+class SessionsWithRawResponse:
+ def __init__(self, sessions: Sessions) -> None:
+ self._sessions = sessions
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ sessions.create,
+ )
+
+
+class AsyncSessionsWithRawResponse:
+ def __init__(self, sessions: AsyncSessions) -> None:
+ self._sessions = sessions
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ sessions.create,
+ )
+
+
+class SessionsWithStreamingResponse:
+ def __init__(self, sessions: Sessions) -> None:
+ self._sessions = sessions
+
+ self.create = to_streamed_response_wrapper(
+ sessions.create,
+ )
+
+
+class AsyncSessionsWithStreamingResponse:
+ def __init__(self, sessions: AsyncSessions) -> None:
+ self._sessions = sessions
+
+ self.create = async_to_streamed_response_wrapper(
+ sessions.create,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py
new file mode 100644
index 00000000..0917da71
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py
@@ -0,0 +1,277 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import transcription_session_create_params
+from ....types.beta.realtime.transcription_session import TranscriptionSession
+
+__all__ = ["TranscriptionSessions", "AsyncTranscriptionSessions"]
+
+
+class TranscriptionSessions(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> TranscriptionSessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return TranscriptionSessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> TranscriptionSessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return TranscriptionSessionsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ include: List[str] | NotGiven = NOT_GIVEN,
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
+ | NotGiven = NOT_GIVEN,
+ input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> TranscriptionSession:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API specifically for realtime transcriptions. Can be configured with
+ the same session parameters as the `transcription_session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ include:
+ The set of items to include in the transcription. Current available items are:
+
+ - `item.input_audio_transcription.logprobs`
+
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+ (mono), and little-endian byte order.
+
+ input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+ off. Noise reduction filters audio added to the input audio buffer before it is
+ sent to VAD and the model. Filtering the audio can improve VAD and turn
+ detection accuracy (reducing false positives) and model performance by improving
+ perception of the input audio.
+
+ input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
+ language and prompt for transcription, these offer additional guidance to the
+ transcription service.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+ set to `null` to turn off, in which case the client must manually trigger model
+ response. Server VAD means that the model will detect the start and end of
+ speech based on audio volume and respond at the end of user speech. Semantic VAD
+ is more advanced and uses a turn detection model (in conjuction with VAD) to
+ semantically estimate whether the user has finished speaking, then dynamically
+ sets a timeout based on this probability. For example, if user audio trails off
+ with "uhhm", the model will score a low probability of turn end and wait longer
+ for the user to continue speaking. This can be useful for more natural
+ conversations, but may have a higher latency.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/realtime/transcription_sessions",
+ body=maybe_transform(
+ {
+ "include": include,
+ "input_audio_format": input_audio_format,
+ "input_audio_noise_reduction": input_audio_noise_reduction,
+ "input_audio_transcription": input_audio_transcription,
+ "modalities": modalities,
+ "turn_detection": turn_detection,
+ },
+ transcription_session_create_params.TranscriptionSessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=TranscriptionSession,
+ )
+
+
+class AsyncTranscriptionSessions(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncTranscriptionSessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncTranscriptionSessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncTranscriptionSessionsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ include: List[str] | NotGiven = NOT_GIVEN,
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
+ | NotGiven = NOT_GIVEN,
+ input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> TranscriptionSession:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API specifically for realtime transcriptions. Can be configured with
+ the same session parameters as the `transcription_session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ include:
+ The set of items to include in the transcription. Current available items are:
+
+ - `item.input_audio_transcription.logprobs`
+
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+ (mono), and little-endian byte order.
+
+ input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+ off. Noise reduction filters audio added to the input audio buffer before it is
+ sent to VAD and the model. Filtering the audio can improve VAD and turn
+ detection accuracy (reducing false positives) and model performance by improving
+ perception of the input audio.
+
+ input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
+ language and prompt for transcription, these offer additional guidance to the
+ transcription service.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+ set to `null` to turn off, in which case the client must manually trigger model
+ response. Server VAD means that the model will detect the start and end of
+ speech based on audio volume and respond at the end of user speech. Semantic VAD
+ is more advanced and uses a turn detection model (in conjuction with VAD) to
+ semantically estimate whether the user has finished speaking, then dynamically
+ sets a timeout based on this probability. For example, if user audio trails off
+ with "uhhm", the model will score a low probability of turn end and wait longer
+ for the user to continue speaking. This can be useful for more natural
+ conversations, but may have a higher latency.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/realtime/transcription_sessions",
+ body=await async_maybe_transform(
+ {
+ "include": include,
+ "input_audio_format": input_audio_format,
+ "input_audio_noise_reduction": input_audio_noise_reduction,
+ "input_audio_transcription": input_audio_transcription,
+ "modalities": modalities,
+ "turn_detection": turn_detection,
+ },
+ transcription_session_create_params.TranscriptionSessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=TranscriptionSession,
+ )
+
+
+class TranscriptionSessionsWithRawResponse:
+ def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
+ self._transcription_sessions = transcription_sessions
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ transcription_sessions.create,
+ )
+
+
+class AsyncTranscriptionSessionsWithRawResponse:
+ def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
+ self._transcription_sessions = transcription_sessions
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ transcription_sessions.create,
+ )
+
+
+class TranscriptionSessionsWithStreamingResponse:
+ def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
+ self._transcription_sessions = transcription_sessions
+
+ self.create = to_streamed_response_wrapper(
+ transcription_sessions.create,
+ )
+
+
+class AsyncTranscriptionSessionsWithStreamingResponse:
+ def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
+ self._transcription_sessions = transcription_sessions
+
+ self.create = async_to_streamed_response_wrapper(
+ transcription_sessions.create,
+ )