aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/openai/resources/beta
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/openai/resources/beta')
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py47
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py1004
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py175
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py11
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py21
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py634
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py47
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py1066
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py383
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py277
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py47
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py670
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py33
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py2989
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py381
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py1875
16 files changed, 9660 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py
new file mode 100644
index 00000000..87fea252
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+ Beta,
+ AsyncBeta,
+ BetaWithRawResponse,
+ AsyncBetaWithRawResponse,
+ BetaWithStreamingResponse,
+ AsyncBetaWithStreamingResponse,
+)
+from .threads import (
+ Threads,
+ AsyncThreads,
+ ThreadsWithRawResponse,
+ AsyncThreadsWithRawResponse,
+ ThreadsWithStreamingResponse,
+ AsyncThreadsWithStreamingResponse,
+)
+from .assistants import (
+ Assistants,
+ AsyncAssistants,
+ AssistantsWithRawResponse,
+ AsyncAssistantsWithRawResponse,
+ AssistantsWithStreamingResponse,
+ AsyncAssistantsWithStreamingResponse,
+)
+
+__all__ = [
+ "Assistants",
+ "AsyncAssistants",
+ "AssistantsWithRawResponse",
+ "AsyncAssistantsWithRawResponse",
+ "AssistantsWithStreamingResponse",
+ "AsyncAssistantsWithStreamingResponse",
+ "Threads",
+ "AsyncThreads",
+ "ThreadsWithRawResponse",
+ "AsyncThreadsWithRawResponse",
+ "ThreadsWithStreamingResponse",
+ "AsyncThreadsWithStreamingResponse",
+ "Beta",
+ "AsyncBeta",
+ "BetaWithRawResponse",
+ "AsyncBetaWithRawResponse",
+ "BetaWithStreamingResponse",
+ "AsyncBetaWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py
new file mode 100644
index 00000000..1c7cbf37
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/assistants.py
@@ -0,0 +1,1004 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ...types.beta import (
+ assistant_list_params,
+ assistant_create_params,
+ assistant_update_params,
+)
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.beta.assistant import Assistant
+from ...types.shared.chat_model import ChatModel
+from ...types.beta.assistant_deleted import AssistantDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.shared.reasoning_effort import ReasoningEffort
+from ...types.beta.assistant_tool_param import AssistantToolParam
+from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Assistants", "AsyncAssistants"]
+
+
+class Assistants(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AssistantsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AssistantsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AssistantsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ model: Union[str, ChatModel],
+ description: Optional[str] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ name: Optional[str] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Assistant:
+ """
+ Create an assistant with a model and instructions.
+
+ Args:
+ model: ID of the model to use. You can use the
+ [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+ see all of your available models, or see our
+ [Model overview](https://platform.openai.com/docs/models) for descriptions of
+ them.
+
+ description: The description of the assistant. The maximum length is 512 characters.
+
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
+ characters.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ name: The name of the assistant. The maximum length is 256 characters.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+ assistant. Tools can be of types `code_interpreter`, `file_search`, or
+ `function`.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/assistants",
+ body=maybe_transform(
+ {
+ "model": model,
+ "description": description,
+ "instructions": instructions,
+ "metadata": metadata,
+ "name": name,
+ "reasoning_effort": reasoning_effort,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_resources": tool_resources,
+ "tools": tools,
+ "top_p": top_p,
+ },
+ assistant_create_params.AssistantCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Assistant,
+ )
+
+ def retrieve(
+ self,
+ assistant_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Assistant:
+ """
+ Retrieves an assistant.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not assistant_id:
+ raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/assistants/{assistant_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Assistant,
+ )
+
+ def update(
+ self,
+ assistant_id: str,
+ *,
+ description: Optional[str] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "o3-mini",
+ "o3-mini-2025-01-31",
+ "o1",
+ "o1-2024-12-17",
+ "gpt-4o",
+ "gpt-4o-2024-11-20",
+ "gpt-4o-2024-08-06",
+ "gpt-4o-2024-05-13",
+ "gpt-4o-mini",
+ "gpt-4o-mini-2024-07-18",
+ "gpt-4.5-preview",
+ "gpt-4.5-preview-2025-02-27",
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ ]
+ | NotGiven = NOT_GIVEN,
+ name: Optional[str] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Assistant:
+ """Modifies an assistant.
+
+ Args:
+ description: The description of the assistant.
+
+ The maximum length is 512 characters.
+
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
+ characters.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: ID of the model to use. You can use the
+ [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+ see all of your available models, or see our
+ [Model overview](https://platform.openai.com/docs/models) for descriptions of
+ them.
+
+ name: The name of the assistant. The maximum length is 256 characters.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+ assistant. Tools can be of types `code_interpreter`, `file_search`, or
+ `function`.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not assistant_id:
+ raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/assistants/{assistant_id}",
+ body=maybe_transform(
+ {
+ "description": description,
+ "instructions": instructions,
+ "metadata": metadata,
+ "model": model,
+ "name": name,
+ "reasoning_effort": reasoning_effort,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_resources": tool_resources,
+ "tools": tools,
+ "top_p": top_p,
+ },
+ assistant_update_params.AssistantUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Assistant,
+ )
+
+ def list(
+ self,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[Assistant]:
+ """Returns a list of assistants.
+
+ Args:
+ after: A cursor for use in pagination.
+
+ `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ "/assistants",
+ page=SyncCursorPage[Assistant],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ },
+ assistant_list_params.AssistantListParams,
+ ),
+ ),
+ model=Assistant,
+ )
+
+ def delete(
+ self,
+ assistant_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantDeleted:
+ """
+ Delete an assistant.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not assistant_id:
+ raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._delete(
+ f"/assistants/{assistant_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=AssistantDeleted,
+ )
+
+
+class AsyncAssistants(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncAssistantsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncAssistantsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ model: Union[str, ChatModel],
+ description: Optional[str] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ name: Optional[str] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Assistant:
+ """
+ Create an assistant with a model and instructions.
+
+ Args:
+ model: ID of the model to use. You can use the
+ [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+ see all of your available models, or see our
+ [Model overview](https://platform.openai.com/docs/models) for descriptions of
+ them.
+
+ description: The description of the assistant. The maximum length is 512 characters.
+
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
+ characters.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ name: The name of the assistant. The maximum length is 256 characters.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+ assistant. Tools can be of types `code_interpreter`, `file_search`, or
+ `function`.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/assistants",
+ body=await async_maybe_transform(
+ {
+ "model": model,
+ "description": description,
+ "instructions": instructions,
+ "metadata": metadata,
+ "name": name,
+ "reasoning_effort": reasoning_effort,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_resources": tool_resources,
+ "tools": tools,
+ "top_p": top_p,
+ },
+ assistant_create_params.AssistantCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Assistant,
+ )
+
+ async def retrieve(
+ self,
+ assistant_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Assistant:
+ """
+ Retrieves an assistant.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not assistant_id:
+ raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/assistants/{assistant_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Assistant,
+ )
+
+ async def update(
+ self,
+ assistant_id: str,
+ *,
+ description: Optional[str] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "o3-mini",
+ "o3-mini-2025-01-31",
+ "o1",
+ "o1-2024-12-17",
+ "gpt-4o",
+ "gpt-4o-2024-11-20",
+ "gpt-4o-2024-08-06",
+ "gpt-4o-2024-05-13",
+ "gpt-4o-mini",
+ "gpt-4o-mini-2024-07-18",
+ "gpt-4.5-preview",
+ "gpt-4.5-preview-2025-02-27",
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ ]
+ | NotGiven = NOT_GIVEN,
+ name: Optional[str] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Assistant:
+ """Modifies an assistant.
+
+ Args:
+ description: The description of the assistant.
+
+ The maximum length is 512 characters.
+
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
+ characters.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: ID of the model to use. You can use the
+ [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+ see all of your available models, or see our
+ [Model overview](https://platform.openai.com/docs/models) for descriptions of
+ them.
+
+ name: The name of the assistant. The maximum length is 256 characters.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+ assistant. Tools can be of types `code_interpreter`, `file_search`, or
+ `function`.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not assistant_id:
+ raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/assistants/{assistant_id}",
+ body=await async_maybe_transform(
+ {
+ "description": description,
+ "instructions": instructions,
+ "metadata": metadata,
+ "model": model,
+ "name": name,
+ "reasoning_effort": reasoning_effort,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_resources": tool_resources,
+ "tools": tools,
+ "top_p": top_p,
+ },
+ assistant_update_params.AssistantUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Assistant,
+ )
+
+ def list(
+ self,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
+ """Returns a list of assistants.
+
+ Args:
+ after: A cursor for use in pagination.
+
+ `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ "/assistants",
+ page=AsyncCursorPage[Assistant],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ },
+ assistant_list_params.AssistantListParams,
+ ),
+ ),
+ model=Assistant,
+ )
+
+ async def delete(
+ self,
+ assistant_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantDeleted:
+ """
+ Delete an assistant.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not assistant_id:
+ raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._delete(
+ f"/assistants/{assistant_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=AssistantDeleted,
+ )
+
+
+class AssistantsWithRawResponse:
+ def __init__(self, assistants: Assistants) -> None:
+ self._assistants = assistants
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ assistants.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ assistants.retrieve,
+ )
+ self.update = _legacy_response.to_raw_response_wrapper(
+ assistants.update,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ assistants.list,
+ )
+ self.delete = _legacy_response.to_raw_response_wrapper(
+ assistants.delete,
+ )
+
+
+class AsyncAssistantsWithRawResponse:
+ def __init__(self, assistants: AsyncAssistants) -> None:
+ self._assistants = assistants
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ assistants.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ assistants.retrieve,
+ )
+ self.update = _legacy_response.async_to_raw_response_wrapper(
+ assistants.update,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ assistants.list,
+ )
+ self.delete = _legacy_response.async_to_raw_response_wrapper(
+ assistants.delete,
+ )
+
+
+class AssistantsWithStreamingResponse:
+ def __init__(self, assistants: Assistants) -> None:
+ self._assistants = assistants
+
+ self.create = to_streamed_response_wrapper(
+ assistants.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ assistants.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ assistants.update,
+ )
+ self.list = to_streamed_response_wrapper(
+ assistants.list,
+ )
+ self.delete = to_streamed_response_wrapper(
+ assistants.delete,
+ )
+
+
+class AsyncAssistantsWithStreamingResponse:
+ def __init__(self, assistants: AsyncAssistants) -> None:
+ self._assistants = assistants
+
+ self.create = async_to_streamed_response_wrapper(
+ assistants.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ assistants.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ assistants.update,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ assistants.list,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ assistants.delete,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py
new file mode 100644
index 00000000..62fc8258
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/beta.py
@@ -0,0 +1,175 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from .chat.chat import Chat, AsyncChat
+from .assistants import (
+ Assistants,
+ AsyncAssistants,
+ AssistantsWithRawResponse,
+ AsyncAssistantsWithRawResponse,
+ AssistantsWithStreamingResponse,
+ AsyncAssistantsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .threads.threads import (
+ Threads,
+ AsyncThreads,
+ ThreadsWithRawResponse,
+ AsyncThreadsWithRawResponse,
+ ThreadsWithStreamingResponse,
+ AsyncThreadsWithStreamingResponse,
+)
+from .realtime.realtime import (
+ Realtime,
+ AsyncRealtime,
+ RealtimeWithRawResponse,
+ AsyncRealtimeWithRawResponse,
+ RealtimeWithStreamingResponse,
+ AsyncRealtimeWithStreamingResponse,
+)
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+ @cached_property
+ def chat(self) -> Chat:
+ return Chat(self._client)
+
+ @cached_property
+ def realtime(self) -> Realtime:
+ return Realtime(self._client)
+
+ @cached_property
+ def assistants(self) -> Assistants:
+ return Assistants(self._client)
+
+ @cached_property
+ def threads(self) -> Threads:
+ return Threads(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> BetaWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return BetaWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> BetaWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+ @cached_property
+ def chat(self) -> AsyncChat:
+ return AsyncChat(self._client)
+
+ @cached_property
+ def realtime(self) -> AsyncRealtime:
+ return AsyncRealtime(self._client)
+
+ @cached_property
+ def assistants(self) -> AsyncAssistants:
+ return AsyncAssistants(self._client)
+
+ @cached_property
+ def threads(self) -> AsyncThreads:
+ return AsyncThreads(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncBetaWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncBetaWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+ def __init__(self, beta: Beta) -> None:
+ self._beta = beta
+
+ @cached_property
+ def realtime(self) -> RealtimeWithRawResponse:
+ return RealtimeWithRawResponse(self._beta.realtime)
+
+ @cached_property
+ def assistants(self) -> AssistantsWithRawResponse:
+ return AssistantsWithRawResponse(self._beta.assistants)
+
+ @cached_property
+ def threads(self) -> ThreadsWithRawResponse:
+ return ThreadsWithRawResponse(self._beta.threads)
+
+
+class AsyncBetaWithRawResponse:
+ def __init__(self, beta: AsyncBeta) -> None:
+ self._beta = beta
+
+ @cached_property
+ def realtime(self) -> AsyncRealtimeWithRawResponse:
+ return AsyncRealtimeWithRawResponse(self._beta.realtime)
+
+ @cached_property
+ def assistants(self) -> AsyncAssistantsWithRawResponse:
+ return AsyncAssistantsWithRawResponse(self._beta.assistants)
+
+ @cached_property
+ def threads(self) -> AsyncThreadsWithRawResponse:
+ return AsyncThreadsWithRawResponse(self._beta.threads)
+
+
+class BetaWithStreamingResponse:
+ def __init__(self, beta: Beta) -> None:
+ self._beta = beta
+
+ @cached_property
+ def realtime(self) -> RealtimeWithStreamingResponse:
+ return RealtimeWithStreamingResponse(self._beta.realtime)
+
+ @cached_property
+ def assistants(self) -> AssistantsWithStreamingResponse:
+ return AssistantsWithStreamingResponse(self._beta.assistants)
+
+ @cached_property
+ def threads(self) -> ThreadsWithStreamingResponse:
+ return ThreadsWithStreamingResponse(self._beta.threads)
+
+
+class AsyncBetaWithStreamingResponse:
+ def __init__(self, beta: AsyncBeta) -> None:
+ self._beta = beta
+
+ @cached_property
+ def realtime(self) -> AsyncRealtimeWithStreamingResponse:
+ return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
+
+ @cached_property
+ def assistants(self) -> AsyncAssistantsWithStreamingResponse:
+ return AsyncAssistantsWithStreamingResponse(self._beta.assistants)
+
+ @cached_property
+ def threads(self) -> AsyncThreadsWithStreamingResponse:
+ return AsyncThreadsWithStreamingResponse(self._beta.threads)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py
new file mode 100644
index 00000000..072d7867
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/__init__.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat import Chat, AsyncChat
+from .completions import Completions, AsyncCompletions
+
+__all__ = [
+ "Completions",
+ "AsyncCompletions",
+ "Chat",
+ "AsyncChat",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py
new file mode 100644
index 00000000..6afdcea3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/chat.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ...._compat import cached_property
+from .completions import Completions, AsyncCompletions
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Chat", "AsyncChat"]
+
+
+class Chat(SyncAPIResource):
+ @cached_property
+ def completions(self) -> Completions:
+ return Completions(self._client)
+
+
+class AsyncChat(AsyncAPIResource):
+ @cached_property
+ def completions(self) -> AsyncCompletions:
+ return AsyncCompletions(self._client)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py
new file mode 100644
index 00000000..545a3f40
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/chat/completions.py
@@ -0,0 +1,634 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Type, Union, Iterable, Optional, cast
+from functools import partial
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream
+from ....types.chat import completion_create_params
+from ...._base_client import make_request_options
+from ....lib._parsing import (
+ ResponseFormatT,
+ validate_input_tools as _validate_input_tools,
+ parse_chat_completion as _parse_chat_completion,
+ type_to_response_format_param as _type_to_response_format,
+)
+from ....types.chat_model import ChatModel
+from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
+from ....types.shared_params import Metadata, ReasoningEffort
+from ....types.chat.chat_completion import ChatCompletion
+from ....types.chat.chat_completion_chunk import ChatCompletionChunk
+from ....types.chat.parsed_chat_completion import ParsedChatCompletion
+from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
+from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
+from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> CompletionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return the
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return CompletionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return CompletionsWithStreamingResponse(self)
+
+ def parse(
+ self,
+ *,
+ messages: Iterable[ChatCompletionMessageParam],
+ model: Union[str, ChatModel],
+ audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+ response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+ functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ store: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+ tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ParsedChatCompletion[ResponseFormatT]:
+ """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+ & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+ You can pass a pydantic model to this method and it will automatically convert the model
+ into a JSON schema, send it to the API and parse the response content back into the given model.
+
+ This method will also automatically parse `function` tool calls if:
+ - You use the `openai.pydantic_function_tool()` helper method
+ - You mark your tool schema with `"strict": True`
+
+ Example usage:
+ ```py
+ from pydantic import BaseModel
+ from openai import OpenAI
+
+
+ class Step(BaseModel):
+ explanation: str
+ output: str
+
+
+ class MathResponse(BaseModel):
+ steps: List[Step]
+ final_answer: str
+
+
+ client = OpenAI()
+ completion = client.beta.chat.completions.parse(
+ model="gpt-4o-2024-08-06",
+ messages=[
+ {"role": "system", "content": "You are a helpful math tutor."},
+ {"role": "user", "content": "solve 8x + 31 = 2"},
+ ],
+ response_format=MathResponse,
+ )
+
+ message = completion.choices[0].message
+ if message.parsed:
+ print(message.parsed.steps)
+ print("answer: ", message.parsed.final_answer)
+ ```
+ """
+ _validate_input_tools(tools)
+
+ extra_headers = {
+ "X-Stainless-Helper-Method": "beta.chat.completions.parse",
+ **(extra_headers or {}),
+ }
+
+ def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+ return _parse_chat_completion(
+ response_format=response_format,
+ chat_completion=raw_completion,
+ input_tools=tools,
+ )
+
+ return self._post(
+ "/chat/completions",
+ body=maybe_transform(
+ {
+ "messages": messages,
+ "model": model,
+ "audio": audio,
+ "frequency_penalty": frequency_penalty,
+ "function_call": function_call,
+ "functions": functions,
+ "logit_bias": logit_bias,
+ "logprobs": logprobs,
+ "max_completion_tokens": max_completion_tokens,
+ "max_tokens": max_tokens,
+ "metadata": metadata,
+ "modalities": modalities,
+ "n": n,
+ "parallel_tool_calls": parallel_tool_calls,
+ "prediction": prediction,
+ "presence_penalty": presence_penalty,
+ "reasoning_effort": reasoning_effort,
+ "response_format": _type_to_response_format(response_format),
+ "seed": seed,
+ "service_tier": service_tier,
+ "stop": stop,
+ "store": store,
+ "stream": False,
+ "stream_options": stream_options,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_logprobs": top_logprobs,
+ "top_p": top_p,
+ "user": user,
+ "web_search_options": web_search_options,
+ },
+ completion_create_params.CompletionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ post_parser=parser,
+ ),
+ # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+ # in the `parser` function above
+ cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+ stream=False,
+ )
+
+ def stream(
+ self,
+ *,
+ messages: Iterable[ChatCompletionMessageParam],
+ model: Union[str, ChatModel],
+ audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+ response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+ functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ store: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+ tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ChatCompletionStreamManager[ResponseFormatT]:
+ """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+ and automatic accumulation of each delta.
+
+ This also supports all of the parsing utilities that `.parse()` does.
+
+ Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+ ```py
+ with client.beta.chat.completions.stream(
+ model="gpt-4o-2024-08-06",
+ messages=[...],
+ ) as stream:
+ for event in stream:
+ if event.type == "content.delta":
+ print(event.delta, flush=True, end="")
+ ```
+
+ When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+ When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+ the context manager.
+ """
+ extra_headers = {
+ "X-Stainless-Helper-Method": "beta.chat.completions.stream",
+ **(extra_headers or {}),
+ }
+
+ api_request: partial[Stream[ChatCompletionChunk]] = partial(
+ self._client.chat.completions.create,
+ messages=messages,
+ model=model,
+ audio=audio,
+ stream=True,
+ response_format=_type_to_response_format(response_format),
+ frequency_penalty=frequency_penalty,
+ function_call=function_call,
+ functions=functions,
+ logit_bias=logit_bias,
+ logprobs=logprobs,
+ max_completion_tokens=max_completion_tokens,
+ max_tokens=max_tokens,
+ metadata=metadata,
+ modalities=modalities,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ prediction=prediction,
+ presence_penalty=presence_penalty,
+ reasoning_effort=reasoning_effort,
+ seed=seed,
+ service_tier=service_tier,
+ store=store,
+ stop=stop,
+ stream_options=stream_options,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ tools=tools,
+ top_logprobs=top_logprobs,
+ top_p=top_p,
+ user=user,
+ web_search_options=web_search_options,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return ChatCompletionStreamManager(
+ api_request,
+ response_format=response_format,
+ input_tools=tools,
+ )
+
+
+class AsyncCompletions(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return the
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncCompletionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncCompletionsWithStreamingResponse(self)
+
+ async def parse(
+ self,
+ *,
+ messages: Iterable[ChatCompletionMessageParam],
+ model: Union[str, ChatModel],
+ audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+ response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+ functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ store: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+ tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ParsedChatCompletion[ResponseFormatT]:
+ """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+ & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+ You can pass a pydantic model to this method and it will automatically convert the model
+ into a JSON schema, send it to the API and parse the response content back into the given model.
+
+ This method will also automatically parse `function` tool calls if:
+ - You use the `openai.pydantic_function_tool()` helper method
+ - You mark your tool schema with `"strict": True`
+
+ Example usage:
+ ```py
+ from pydantic import BaseModel
+ from openai import AsyncOpenAI
+
+
+ class Step(BaseModel):
+ explanation: str
+ output: str
+
+
+ class MathResponse(BaseModel):
+ steps: List[Step]
+ final_answer: str
+
+
+ client = AsyncOpenAI()
+ completion = await client.beta.chat.completions.parse(
+ model="gpt-4o-2024-08-06",
+ messages=[
+ {"role": "system", "content": "You are a helpful math tutor."},
+ {"role": "user", "content": "solve 8x + 31 = 2"},
+ ],
+ response_format=MathResponse,
+ )
+
+ message = completion.choices[0].message
+ if message.parsed:
+ print(message.parsed.steps)
+ print("answer: ", message.parsed.final_answer)
+ ```
+ """
+ _validate_input_tools(tools)
+
+ extra_headers = {
+ "X-Stainless-Helper-Method": "beta.chat.completions.parse",
+ **(extra_headers or {}),
+ }
+
+ def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+ return _parse_chat_completion(
+ response_format=response_format,
+ chat_completion=raw_completion,
+ input_tools=tools,
+ )
+
+ return await self._post(
+ "/chat/completions",
+ body=await async_maybe_transform(
+ {
+ "messages": messages,
+ "model": model,
+ "audio": audio,
+ "frequency_penalty": frequency_penalty,
+ "function_call": function_call,
+ "functions": functions,
+ "logit_bias": logit_bias,
+ "logprobs": logprobs,
+ "max_completion_tokens": max_completion_tokens,
+ "max_tokens": max_tokens,
+ "metadata": metadata,
+ "modalities": modalities,
+ "n": n,
+ "parallel_tool_calls": parallel_tool_calls,
+ "prediction": prediction,
+ "presence_penalty": presence_penalty,
+ "reasoning_effort": reasoning_effort,
+ "response_format": _type_to_response_format(response_format),
+ "seed": seed,
+ "service_tier": service_tier,
+ "store": store,
+ "stop": stop,
+ "stream": False,
+ "stream_options": stream_options,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_logprobs": top_logprobs,
+ "top_p": top_p,
+ "user": user,
+ "web_search_options": web_search_options,
+ },
+ completion_create_params.CompletionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ post_parser=parser,
+ ),
+ # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+ # in the `parser` function above
+ cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+ stream=False,
+ )
+
+ def stream(
+ self,
+ *,
+ messages: Iterable[ChatCompletionMessageParam],
+ model: Union[str, ChatModel],
+ audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+ response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+ functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ store: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+ tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
+ """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+ and automatic accumulation of each delta.
+
+ This also supports all of the parsing utilities that `.parse()` does.
+
+ Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+ ```py
+ async with client.beta.chat.completions.stream(
+ model="gpt-4o-2024-08-06",
+ messages=[...],
+ ) as stream:
+ async for event in stream:
+ if event.type == "content.delta":
+ print(event.delta, flush=True, end="")
+ ```
+
+ When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+ When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+ the context manager.
+ """
+ _validate_input_tools(tools)
+
+ extra_headers = {
+ "X-Stainless-Helper-Method": "beta.chat.completions.stream",
+ **(extra_headers or {}),
+ }
+
+ api_request = self._client.chat.completions.create(
+ messages=messages,
+ model=model,
+ audio=audio,
+ stream=True,
+ response_format=_type_to_response_format(response_format),
+ frequency_penalty=frequency_penalty,
+ function_call=function_call,
+ functions=functions,
+ logit_bias=logit_bias,
+ logprobs=logprobs,
+ max_completion_tokens=max_completion_tokens,
+ max_tokens=max_tokens,
+ metadata=metadata,
+ modalities=modalities,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ prediction=prediction,
+ presence_penalty=presence_penalty,
+ reasoning_effort=reasoning_effort,
+ seed=seed,
+ service_tier=service_tier,
+ stop=stop,
+ store=store,
+ stream_options=stream_options,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ tools=tools,
+ top_logprobs=top_logprobs,
+ top_p=top_p,
+ user=user,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ web_search_options=web_search_options,
+ )
+ return AsyncChatCompletionStreamManager(
+ api_request,
+ response_format=response_format,
+ input_tools=tools,
+ )
+
+
+class CompletionsWithRawResponse:
+ def __init__(self, completions: Completions) -> None:
+ self._completions = completions
+
+ self.parse = _legacy_response.to_raw_response_wrapper(
+ completions.parse,
+ )
+
+
+class AsyncCompletionsWithRawResponse:
+ def __init__(self, completions: AsyncCompletions) -> None:
+ self._completions = completions
+
+ self.parse = _legacy_response.async_to_raw_response_wrapper(
+ completions.parse,
+ )
+
+
+class CompletionsWithStreamingResponse:
+ def __init__(self, completions: Completions) -> None:
+ self._completions = completions
+
+ self.parse = to_streamed_response_wrapper(
+ completions.parse,
+ )
+
+
+class AsyncCompletionsWithStreamingResponse:
+ def __init__(self, completions: AsyncCompletions) -> None:
+ self._completions = completions
+
+ self.parse = async_to_streamed_response_wrapper(
+ completions.parse,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py
new file mode 100644
index 00000000..7ab3d993
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+ Realtime,
+ AsyncRealtime,
+ RealtimeWithRawResponse,
+ AsyncRealtimeWithRawResponse,
+ RealtimeWithStreamingResponse,
+ AsyncRealtimeWithStreamingResponse,
+)
+from .sessions import (
+ Sessions,
+ AsyncSessions,
+ SessionsWithRawResponse,
+ AsyncSessionsWithRawResponse,
+ SessionsWithStreamingResponse,
+ AsyncSessionsWithStreamingResponse,
+)
+from .transcription_sessions import (
+ TranscriptionSessions,
+ AsyncTranscriptionSessions,
+ TranscriptionSessionsWithRawResponse,
+ AsyncTranscriptionSessionsWithRawResponse,
+ TranscriptionSessionsWithStreamingResponse,
+ AsyncTranscriptionSessionsWithStreamingResponse,
+)
+
+__all__ = [
+ "Sessions",
+ "AsyncSessions",
+ "SessionsWithRawResponse",
+ "AsyncSessionsWithRawResponse",
+ "SessionsWithStreamingResponse",
+ "AsyncSessionsWithStreamingResponse",
+ "TranscriptionSessions",
+ "AsyncTranscriptionSessions",
+ "TranscriptionSessionsWithRawResponse",
+ "AsyncTranscriptionSessionsWithRawResponse",
+ "TranscriptionSessionsWithStreamingResponse",
+ "AsyncTranscriptionSessionsWithStreamingResponse",
+ "Realtime",
+ "AsyncRealtime",
+ "RealtimeWithRawResponse",
+ "AsyncRealtimeWithRawResponse",
+ "RealtimeWithStreamingResponse",
+ "AsyncRealtimeWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py
new file mode 100644
index 00000000..76e57f8c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/realtime.py
@@ -0,0 +1,1066 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
+from .sessions import (
+ Sessions,
+ AsyncSessions,
+ SessionsWithRawResponse,
+ AsyncSessionsWithRawResponse,
+ SessionsWithStreamingResponse,
+ AsyncSessionsWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Query, Headers, NotGiven
+from ...._utils import (
+ is_azure_client,
+ maybe_transform,
+ strip_not_given,
+ async_maybe_transform,
+ is_async_azure_client,
+)
+from ...._compat import cached_property
+from ...._models import construct_type_unchecked
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._exceptions import OpenAIError
+from ...._base_client import _merge_mappings
+from ....types.beta.realtime import (
+ session_update_event_param,
+ response_create_event_param,
+ transcription_session_update_param,
+)
+from .transcription_sessions import (
+ TranscriptionSessions,
+ AsyncTranscriptionSessions,
+ TranscriptionSessionsWithRawResponse,
+ AsyncTranscriptionSessionsWithRawResponse,
+ TranscriptionSessionsWithStreamingResponse,
+ AsyncTranscriptionSessionsWithStreamingResponse,
+)
+from ....types.websocket_connection_options import WebsocketConnectionOptions
+from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
+from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
+from ....types.beta.realtime.conversation_item_param import ConversationItemParam
+from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
+
+if TYPE_CHECKING:
+ from websockets.sync.client import ClientConnection as WebsocketConnection
+ from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+ from ...._client import OpenAI, AsyncOpenAI
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Realtime(SyncAPIResource):
+ @cached_property
+ def sessions(self) -> Sessions:
+ return Sessions(self._client)
+
+ @cached_property
+ def transcription_sessions(self) -> TranscriptionSessions:
+ return TranscriptionSessions(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> RealtimeWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return RealtimeWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return RealtimeWithStreamingResponse(self)
+
+ def connect(
+ self,
+ *,
+ model: str,
+ extra_query: Query = {},
+ extra_headers: Headers = {},
+ websocket_connection_options: WebsocketConnectionOptions = {},
+ ) -> RealtimeConnectionManager:
+ """
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+ Some notable benefits of the API include:
+
+ - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+ - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+ - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+ The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+ """
+ return RealtimeConnectionManager(
+ client=self._client,
+ extra_query=extra_query,
+ extra_headers=extra_headers,
+ websocket_connection_options=websocket_connection_options,
+ model=model,
+ )
+
+
+class AsyncRealtime(AsyncAPIResource):
+ @cached_property
+ def sessions(self) -> AsyncSessions:
+ return AsyncSessions(self._client)
+
+ @cached_property
+ def transcription_sessions(self) -> AsyncTranscriptionSessions:
+ return AsyncTranscriptionSessions(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncRealtimeWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncRealtimeWithStreamingResponse(self)
+
+ def connect(
+ self,
+ *,
+ model: str,
+ extra_query: Query = {},
+ extra_headers: Headers = {},
+ websocket_connection_options: WebsocketConnectionOptions = {},
+ ) -> AsyncRealtimeConnectionManager:
+ """
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+ Some notable benefits of the API include:
+
+ - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+ - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+ - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+ The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+ """
+ return AsyncRealtimeConnectionManager(
+ client=self._client,
+ extra_query=extra_query,
+ extra_headers=extra_headers,
+ websocket_connection_options=websocket_connection_options,
+ model=model,
+ )
+
+
+class RealtimeWithRawResponse:
+ def __init__(self, realtime: Realtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> SessionsWithRawResponse:
+ return SessionsWithRawResponse(self._realtime.sessions)
+
+ @cached_property
+ def transcription_sessions(self) -> TranscriptionSessionsWithRawResponse:
+ return TranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeWithRawResponse:
+ def __init__(self, realtime: AsyncRealtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> AsyncSessionsWithRawResponse:
+ return AsyncSessionsWithRawResponse(self._realtime.sessions)
+
+ @cached_property
+ def transcription_sessions(self) -> AsyncTranscriptionSessionsWithRawResponse:
+ return AsyncTranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
+
+
+class RealtimeWithStreamingResponse:
+ def __init__(self, realtime: Realtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> SessionsWithStreamingResponse:
+ return SessionsWithStreamingResponse(self._realtime.sessions)
+
+ @cached_property
+ def transcription_sessions(self) -> TranscriptionSessionsWithStreamingResponse:
+ return TranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeWithStreamingResponse:
+ def __init__(self, realtime: AsyncRealtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> AsyncSessionsWithStreamingResponse:
+ return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
+
+ @cached_property
+ def transcription_sessions(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
+ return AsyncTranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeConnection:
+ """Represents a live websocket connection to the Realtime API"""
+
+ session: AsyncRealtimeSessionResource
+ response: AsyncRealtimeResponseResource
+ input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+ conversation: AsyncRealtimeConversationResource
+ transcription_session: AsyncRealtimeTranscriptionSessionResource
+
+ _connection: AsyncWebsocketConnection
+
+ def __init__(self, connection: AsyncWebsocketConnection) -> None:
+ self._connection = connection
+
+ self.session = AsyncRealtimeSessionResource(self)
+ self.response = AsyncRealtimeResponseResource(self)
+ self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+ self.conversation = AsyncRealtimeConversationResource(self)
+ self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
+
+ async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+ """
+ An infinite-iterator that will continue to yield events until
+ the connection is closed.
+ """
+ from websockets.exceptions import ConnectionClosedOK
+
+ try:
+ while True:
+ yield await self.recv()
+ except ConnectionClosedOK:
+ return
+
+ async def recv(self) -> RealtimeServerEvent:
+ """
+ Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+ Canceling this method is safe. There's no risk of losing data.
+ """
+ return self.parse_event(await self.recv_bytes())
+
+ async def recv_bytes(self) -> bytes:
+ """Receive the next message from the connection as raw bytes.
+
+ Canceling this method is safe. There's no risk of losing data.
+
+ If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+ then you can call `.parse_event(data)`.
+ """
+ message = await self._connection.recv(decode=False)
+ log.debug(f"Received websocket message: %s", message)
+ if not isinstance(message, bytes):
+ # passing `decode=False` should always result in us getting `bytes` back
+ raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+ return message
+
+ async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+ data = (
+ event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+ if isinstance(event, BaseModel)
+ else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+ )
+ await self._connection.send(data)
+
+ async def close(self, *, code: int = 1000, reason: str = "") -> None:
+ await self._connection.close(code=code, reason=reason)
+
+ def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+ """
+ Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+ This is helpful if you're using `.recv_bytes()`.
+ """
+ return cast(
+ RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+ )
+
+
+class AsyncRealtimeConnectionManager:
+ """
+ Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
+
+ This context manager ensures that the connection will be closed when it exits.
+
+ ---
+
+ Note that if your application doesn't work well with the context manager approach then you
+ can call the `.enter()` method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = await client.beta.realtime.connect(...).enter()
+ # ...
+ await connection.close()
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ client: AsyncOpenAI,
+ model: str,
+ extra_query: Query,
+ extra_headers: Headers,
+ websocket_connection_options: WebsocketConnectionOptions,
+ ) -> None:
+ self.__client = client
+ self.__model = model
+ self.__connection: AsyncRealtimeConnection | None = None
+ self.__extra_query = extra_query
+ self.__extra_headers = extra_headers
+ self.__websocket_connection_options = websocket_connection_options
+
+ async def __aenter__(self) -> AsyncRealtimeConnection:
+ """
+ 👋 If your application doesn't work well with the context manager approach then you
+ can call this method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = await client.beta.realtime.connect(...).enter()
+ # ...
+ await connection.close()
+ ```
+ """
+ try:
+ from websockets.asyncio.client import connect
+ except ImportError as exc:
+ raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+ extra_query = self.__extra_query
+ auth_headers = self.__client.auth_headers
+ if is_async_azure_client(self.__client):
+ url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query)
+ else:
+ url = self._prepare_url().copy_with(
+ params={
+ **self.__client.base_url.params,
+ "model": self.__model,
+ **extra_query,
+ },
+ )
+ log.debug("Connecting to %s", url)
+ if self.__websocket_connection_options:
+ log.debug("Connection options: %s", self.__websocket_connection_options)
+
+ self.__connection = AsyncRealtimeConnection(
+ await connect(
+ str(url),
+ user_agent_header=self.__client.user_agent,
+ additional_headers=_merge_mappings(
+ {
+ **auth_headers,
+ "OpenAI-Beta": "realtime=v1",
+ },
+ self.__extra_headers,
+ ),
+ **self.__websocket_connection_options,
+ )
+ )
+
+ return self.__connection
+
+ enter = __aenter__
+
+ def _prepare_url(self) -> httpx.URL:
+ if self.__client.websocket_base_url is not None:
+ base_url = httpx.URL(self.__client.websocket_base_url)
+ else:
+ base_url = self.__client._base_url.copy_with(scheme="wss")
+
+ merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+ return base_url.copy_with(raw_path=merge_raw_path)
+
+ async def __aexit__(
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+ ) -> None:
+ if self.__connection is not None:
+ await self.__connection.close()
+
+
+class RealtimeConnection:
+ """Represents a live websocket connection to the Realtime API"""
+
+ session: RealtimeSessionResource
+ response: RealtimeResponseResource
+ input_audio_buffer: RealtimeInputAudioBufferResource
+ conversation: RealtimeConversationResource
+ transcription_session: RealtimeTranscriptionSessionResource
+
+ _connection: WebsocketConnection
+
+ def __init__(self, connection: WebsocketConnection) -> None:
+ self._connection = connection
+
+ self.session = RealtimeSessionResource(self)
+ self.response = RealtimeResponseResource(self)
+ self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+ self.conversation = RealtimeConversationResource(self)
+ self.transcription_session = RealtimeTranscriptionSessionResource(self)
+
+ def __iter__(self) -> Iterator[RealtimeServerEvent]:
+ """
+ An infinite-iterator that will continue to yield events until
+ the connection is closed.
+ """
+ from websockets.exceptions import ConnectionClosedOK
+
+ try:
+ while True:
+ yield self.recv()
+ except ConnectionClosedOK:
+ return
+
+ def recv(self) -> RealtimeServerEvent:
+ """
+ Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+ Canceling this method is safe. There's no risk of losing data.
+ """
+ return self.parse_event(self.recv_bytes())
+
+ def recv_bytes(self) -> bytes:
+ """Receive the next message from the connection as raw bytes.
+
+ Canceling this method is safe. There's no risk of losing data.
+
+ If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+ then you can call `.parse_event(data)`.
+ """
+ message = self._connection.recv(decode=False)
+ log.debug(f"Received websocket message: %s", message)
+ if not isinstance(message, bytes):
+ # passing `decode=False` should always result in us getting `bytes` back
+ raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+ return message
+
+ def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+ data = (
+ event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+ if isinstance(event, BaseModel)
+ else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+ )
+ self._connection.send(data)
+
+ def close(self, *, code: int = 1000, reason: str = "") -> None:
+ self._connection.close(code=code, reason=reason)
+
+ def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+ """
+ Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+ This is helpful if you're using `.recv_bytes()`.
+ """
+ return cast(
+ RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+ )
+
+
+class RealtimeConnectionManager:
+ """
+ Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
+
+ This context manager ensures that the connection will be closed when it exits.
+
+ ---
+
+ Note that if your application doesn't work well with the context manager approach then you
+ can call the `.enter()` method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = client.beta.realtime.connect(...).enter()
+ # ...
+ connection.close()
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ client: OpenAI,
+ model: str,
+ extra_query: Query,
+ extra_headers: Headers,
+ websocket_connection_options: WebsocketConnectionOptions,
+ ) -> None:
+ self.__client = client
+ self.__model = model
+ self.__connection: RealtimeConnection | None = None
+ self.__extra_query = extra_query
+ self.__extra_headers = extra_headers
+ self.__websocket_connection_options = websocket_connection_options
+
+ def __enter__(self) -> RealtimeConnection:
+ """
+ 👋 If your application doesn't work well with the context manager approach then you
+ can call this method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = client.beta.realtime.connect(...).enter()
+ # ...
+ connection.close()
+ ```
+ """
+ try:
+ from websockets.sync.client import connect
+ except ImportError as exc:
+ raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+ extra_query = self.__extra_query
+ auth_headers = self.__client.auth_headers
+ if is_azure_client(self.__client):
+ url, auth_headers = self.__client._configure_realtime(self.__model, extra_query)
+ else:
+ url = self._prepare_url().copy_with(
+ params={
+ **self.__client.base_url.params,
+ "model": self.__model,
+ **extra_query,
+ },
+ )
+ log.debug("Connecting to %s", url)
+ if self.__websocket_connection_options:
+ log.debug("Connection options: %s", self.__websocket_connection_options)
+
+ self.__connection = RealtimeConnection(
+ connect(
+ str(url),
+ user_agent_header=self.__client.user_agent,
+ additional_headers=_merge_mappings(
+ {
+ **auth_headers,
+ "OpenAI-Beta": "realtime=v1",
+ },
+ self.__extra_headers,
+ ),
+ **self.__websocket_connection_options,
+ )
+ )
+
+ return self.__connection
+
+ enter = __enter__
+
+ def _prepare_url(self) -> httpx.URL:
+ if self.__client.websocket_base_url is not None:
+ base_url = httpx.URL(self.__client.websocket_base_url)
+ else:
+ base_url = self.__client._base_url.copy_with(scheme="wss")
+
+ merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+ return base_url.copy_with(raw_path=merge_raw_path)
+
+ def __exit__(
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+ ) -> None:
+ if self.__connection is not None:
+ self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+ def __init__(self, connection: RealtimeConnection) -> None:
+ self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+ def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to update the session’s default configuration.
+ The client may send this event at any time to update any field,
+ except for `voice`. However, note that once a session has been
+ initialized with a particular `model`, it can’t be changed to
+ another model using `session.update`.
+
+ When the server receives a `session.update`, it will respond
+ with a `session.updated` event showing the full, effective configuration.
+ Only the fields that are present are updated. To clear a field like
+ `instructions`, pass an empty string.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+ def create(
+ self,
+ *,
+ event_id: str | NotGiven = NOT_GIVEN,
+ response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ This event instructs the server to create a Response, which means triggering
+ model inference. When in Server VAD mode, the server will create Responses
+ automatically.
+
+ A Response will include at least one Item, and may have two, in which case
+ the second will be a function call. These Items will be appended to the
+ conversation history.
+
+ The server will respond with a `response.created` event, events for Items
+ and content created, and finally a `response.done` event to indicate the
+ Response is complete.
+
+ The `response.create` event includes inference configuration like
+ `instructions`, and `temperature`. These fields will override the Session's
+ configuration for this Response only.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+ )
+ )
+
+ def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to cancel an in-progress response.
+
+ The server will respond
+ with a `response.cancelled` event or an error if there is no response to
+ cancel.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+ )
+ )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+ def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to clear the audio bytes in the buffer.
+
+ The server will
+ respond with an `input_audio_buffer.cleared` event.
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+ )
+
+ def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to commit the user input audio buffer, which will create a
+ new user message item in the conversation. This event will produce an error
+ if the input audio buffer is empty. When in Server VAD mode, the client does
+ not need to send this event, the server will commit the audio buffer
+ automatically.
+
+ Committing the input audio buffer will trigger input audio transcription
+ (if enabled in session configuration), but it will not create a response
+ from the model. The server will respond with an `input_audio_buffer.committed`
+ event.
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+ )
+
+ def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to append audio bytes to the input audio buffer.
+
+ The audio
+ buffer is temporary storage you can write to and later commit. In Server VAD
+ mode, the audio buffer is used to detect speech and the server will decide
+ when to commit. When Server VAD is disabled, you must commit the audio buffer
+ manually.
+
+ The client may choose how much audio to place in each event up to a maximum
+ of 15 MiB, for example streaming smaller chunks from the client may allow the
+ VAD to be more responsive. Unlike made other client events, the server will
+ not send a confirmation response to this event.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+ @cached_property
+ def item(self) -> RealtimeConversationItemResource:
+ return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+ def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event when you want to remove any item from the conversation
+ history.
+
+ The server will respond with a `conversation.item.deleted` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+ def create(
+ self,
+ *,
+ item: ConversationItemParam,
+ event_id: str | NotGiven = NOT_GIVEN,
+ previous_item_id: str | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ Add a new Item to the Conversation's context, including messages, function
+ calls, and function call responses. This event can be used both to populate a
+ "history" of the conversation and to add new items mid-stream, but has the
+ current limitation that it cannot populate assistant audio messages.
+
+ If successful, the server will respond with a `conversation.item.created`
+ event, otherwise an `error` event will be sent.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.create",
+ "item": item,
+ "event_id": event_id,
+ "previous_item_id": previous_item_id,
+ }
+ ),
+ )
+ )
+
+ def truncate(
+ self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to truncate a previous assistant message’s audio.
+
+ The server
+ will produce audio faster than realtime, so this event is useful when the user
+ interrupts to truncate audio that has already been sent to the client but not
+ yet played. This will synchronize the server's understanding of the audio with
+ the client's playback.
+
+ Truncating audio will delete the server-side text transcript to ensure there
+ is not text in the context that hasn't been heard by the user.
+
+ If successful, the server will respond with a `conversation.item.truncated`
+ event.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.truncate",
+ "audio_end_ms": audio_end_ms,
+ "content_index": content_index,
+ "item_id": item_id,
+ "event_id": event_id,
+ }
+ ),
+ )
+ )
+
+ def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+ The server will respond with a `conversation.item.retrieved` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
+ def update(
+ self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to update a transcription session."""
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class BaseAsyncRealtimeConnectionResource:
+ def __init__(self, connection: AsyncRealtimeConnection) -> None:
+ self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+ async def update(
+ self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """
+ Send this event to update the session’s default configuration.
+ The client may send this event at any time to update any field,
+ except for `voice`. However, note that once a session has been
+ initialized with a particular `model`, it can’t be changed to
+ another model using `session.update`.
+
+ When the server receives a `session.update`, it will respond
+ with a `session.updated` event showing the full, effective configuration.
+ Only the fields that are present are updated. To clear a field like
+ `instructions`, pass an empty string.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+ async def create(
+ self,
+ *,
+ event_id: str | NotGiven = NOT_GIVEN,
+ response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ This event instructs the server to create a Response, which means triggering
+ model inference. When in Server VAD mode, the server will create Responses
+ automatically.
+
+ A Response will include at least one Item, and may have two, in which case
+ the second will be a function call. These Items will be appended to the
+ conversation history.
+
+ The server will respond with a `response.created` event, events for Items
+ and content created, and finally a `response.done` event to indicate the
+ Response is complete.
+
+ The `response.create` event includes inference configuration like
+ `instructions`, and `temperature`. These fields will override the Session's
+ configuration for this Response only.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+ )
+ )
+
+ async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to cancel an in-progress response.
+
+ The server will respond
+ with a `response.cancelled` event or an error if there is no response to
+ cancel.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+ )
+ )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+ async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to clear the audio bytes in the buffer.
+
+ The server will
+ respond with an `input_audio_buffer.cleared` event.
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+ )
+
+ async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to commit the user input audio buffer, which will create a
+ new user message item in the conversation. This event will produce an error
+ if the input audio buffer is empty. When in Server VAD mode, the client does
+ not need to send this event, the server will commit the audio buffer
+ automatically.
+
+ Committing the input audio buffer will trigger input audio transcription
+ (if enabled in session configuration), but it will not create a response
+ from the model. The server will respond with an `input_audio_buffer.committed`
+ event.
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+ )
+
+ async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to append audio bytes to the input audio buffer.
+
+ The audio
+ buffer is temporary storage you can write to and later commit. In Server VAD
+ mode, the audio buffer is used to detect speech and the server will decide
+ when to commit. When Server VAD is disabled, you must commit the audio buffer
+ manually.
+
+ The client may choose how much audio to place in each event up to a maximum
+ of 15 MiB, for example streaming smaller chunks from the client may allow the
+ VAD to be more responsive. Unlike made other client events, the server will
+ not send a confirmation response to this event.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+ @cached_property
+ def item(self) -> AsyncRealtimeConversationItemResource:
+ return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+ async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event when you want to remove any item from the conversation
+ history.
+
+ The server will respond with a `conversation.item.deleted` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+ async def create(
+ self,
+ *,
+ item: ConversationItemParam,
+ event_id: str | NotGiven = NOT_GIVEN,
+ previous_item_id: str | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ Add a new Item to the Conversation's context, including messages, function
+ calls, and function call responses. This event can be used both to populate a
+ "history" of the conversation and to add new items mid-stream, but has the
+ current limitation that it cannot populate assistant audio messages.
+
+ If successful, the server will respond with a `conversation.item.created`
+ event, otherwise an `error` event will be sent.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.create",
+ "item": item,
+ "event_id": event_id,
+ "previous_item_id": previous_item_id,
+ }
+ ),
+ )
+ )
+
+ async def truncate(
+ self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to truncate a previous assistant message’s audio.
+
+ The server
+ will produce audio faster than realtime, so this event is useful when the user
+ interrupts to truncate audio that has already been sent to the client but not
+ yet played. This will synchronize the server's understanding of the audio with
+ the client's playback.
+
+ Truncating audio will delete the server-side text transcript to ensure there
+ is not text in the context that hasn't been heard by the user.
+
+ If successful, the server will respond with a `conversation.item.truncated`
+ event.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.truncate",
+ "audio_end_ms": audio_end_ms,
+ "content_index": content_index,
+ "item_id": item_id,
+ "event_id": event_id,
+ }
+ ),
+ )
+ )
+
+ async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+ The server will respond with a `conversation.item.retrieved` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
+ async def update(
+ self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to update a transcription session."""
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+ )
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py
new file mode 100644
index 00000000..5884e54d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/sessions.py
@@ -0,0 +1,383 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> SessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return SessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> SessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return SessionsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+ input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ instructions: str | NotGiven = NOT_GIVEN,
+ max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ model: Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ]
+ | NotGiven = NOT_GIVEN,
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: str | NotGiven = NOT_GIVEN,
+ tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+ turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SessionCreateResponse:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API. Can be configured with the same session parameters as the
+ `session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+ (mono), and little-endian byte order.
+
+ input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+ off. Noise reduction filters audio added to the input audio buffer before it is
+ sent to VAD and the model. Filtering the audio can improve VAD and turn
+ detection accuracy (reducing false positives) and model performance by improving
+ perception of the input audio.
+
+ input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through
+ [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+ and should be treated as guidance of input audio content rather than precisely
+ what the model heard. The client can optionally set the language and prompt for
+ transcription, these offer additional guidance to the transcription service.
+
+ instructions: The default system instructions (i.e. system message) prepended to model calls.
+ This field allows the client to guide the model on desired responses. The model
+ can be instructed on response content and format, (e.g. "be extremely succinct",
+ "act friendly", "here are examples of good responses") and on audio behavior
+ (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+ instructions are not guaranteed to be followed by the model, but they provide
+ guidance to the model on the desired behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+
+ max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ model: The Realtime model used for this session.
+
+ output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+ For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+ temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+ temperature of 0.8 is highly recommended for best performance.
+
+ tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+ a function.
+
+ tools: Tools (functions) available to the model.
+
+ turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+ set to `null` to turn off, in which case the client must manually trigger model
+ response. Server VAD means that the model will detect the start and end of
+ speech based on audio volume and respond at the end of user speech. Semantic VAD
+ is more advanced and uses a turn detection model (in conjuction with VAD) to
+ semantically estimate whether the user has finished speaking, then dynamically
+ sets a timeout based on this probability. For example, if user audio trails off
+ with "uhhm", the model will score a low probability of turn end and wait longer
+ for the user to continue speaking. This can be useful for more natural
+ conversations, but may have a higher latency.
+
+ voice: The voice the model uses to respond. Voice cannot be changed during the session
+ once the model has responded with audio at least once. Current voice options are
+ `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/realtime/sessions",
+ body=maybe_transform(
+ {
+ "input_audio_format": input_audio_format,
+ "input_audio_noise_reduction": input_audio_noise_reduction,
+ "input_audio_transcription": input_audio_transcription,
+ "instructions": instructions,
+ "max_response_output_tokens": max_response_output_tokens,
+ "modalities": modalities,
+ "model": model,
+ "output_audio_format": output_audio_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "turn_detection": turn_detection,
+ "voice": voice,
+ },
+ session_create_params.SessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=SessionCreateResponse,
+ )
+
+
+class AsyncSessions(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncSessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncSessionsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+ input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ instructions: str | NotGiven = NOT_GIVEN,
+ max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ model: Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ]
+ | NotGiven = NOT_GIVEN,
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: str | NotGiven = NOT_GIVEN,
+ tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+ turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SessionCreateResponse:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API. Can be configured with the same session parameters as the
+ `session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+ (mono), and little-endian byte order.
+
+ input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+ off. Noise reduction filters audio added to the input audio buffer before it is
+ sent to VAD and the model. Filtering the audio can improve VAD and turn
+ detection accuracy (reducing false positives) and model performance by improving
+ perception of the input audio.
+
+ input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through
+ [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+ and should be treated as guidance of input audio content rather than precisely
+ what the model heard. The client can optionally set the language and prompt for
+ transcription, these offer additional guidance to the transcription service.
+
+ instructions: The default system instructions (i.e. system message) prepended to model calls.
+ This field allows the client to guide the model on desired responses. The model
+ can be instructed on response content and format, (e.g. "be extremely succinct",
+ "act friendly", "here are examples of good responses") and on audio behavior
+ (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+ instructions are not guaranteed to be followed by the model, but they provide
+ guidance to the model on the desired behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+
+ max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ model: The Realtime model used for this session.
+
+ output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+ For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+ temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+ temperature of 0.8 is highly recommended for best performance.
+
+ tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+ a function.
+
+ tools: Tools (functions) available to the model.
+
+ turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+ set to `null` to turn off, in which case the client must manually trigger model
+ response. Server VAD means that the model will detect the start and end of
+ speech based on audio volume and respond at the end of user speech. Semantic VAD
+ is more advanced and uses a turn detection model (in conjuction with VAD) to
+ semantically estimate whether the user has finished speaking, then dynamically
+ sets a timeout based on this probability. For example, if user audio trails off
+ with "uhhm", the model will score a low probability of turn end and wait longer
+ for the user to continue speaking. This can be useful for more natural
+ conversations, but may have a higher latency.
+
+ voice: The voice the model uses to respond. Voice cannot be changed during the session
+ once the model has responded with audio at least once. Current voice options are
+ `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/realtime/sessions",
+ body=await async_maybe_transform(
+ {
+ "input_audio_format": input_audio_format,
+ "input_audio_noise_reduction": input_audio_noise_reduction,
+ "input_audio_transcription": input_audio_transcription,
+ "instructions": instructions,
+ "max_response_output_tokens": max_response_output_tokens,
+ "modalities": modalities,
+ "model": model,
+ "output_audio_format": output_audio_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "turn_detection": turn_detection,
+ "voice": voice,
+ },
+ session_create_params.SessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=SessionCreateResponse,
+ )
+
+
+class SessionsWithRawResponse:
+ def __init__(self, sessions: Sessions) -> None:
+ self._sessions = sessions
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ sessions.create,
+ )
+
+
+class AsyncSessionsWithRawResponse:
+ def __init__(self, sessions: AsyncSessions) -> None:
+ self._sessions = sessions
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ sessions.create,
+ )
+
+
+class SessionsWithStreamingResponse:
+ def __init__(self, sessions: Sessions) -> None:
+ self._sessions = sessions
+
+ self.create = to_streamed_response_wrapper(
+ sessions.create,
+ )
+
+
+class AsyncSessionsWithStreamingResponse:
+ def __init__(self, sessions: AsyncSessions) -> None:
+ self._sessions = sessions
+
+ self.create = async_to_streamed_response_wrapper(
+ sessions.create,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py
new file mode 100644
index 00000000..0917da71
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/realtime/transcription_sessions.py
@@ -0,0 +1,277 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import transcription_session_create_params
+from ....types.beta.realtime.transcription_session import TranscriptionSession
+
+__all__ = ["TranscriptionSessions", "AsyncTranscriptionSessions"]
+
+
+class TranscriptionSessions(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> TranscriptionSessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return TranscriptionSessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> TranscriptionSessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return TranscriptionSessionsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ include: List[str] | NotGiven = NOT_GIVEN,
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
+ | NotGiven = NOT_GIVEN,
+ input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> TranscriptionSession:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API specifically for realtime transcriptions. Can be configured with
+ the same session parameters as the `transcription_session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ include:
+ The set of items to include in the transcription. Current available items are:
+
+ - `item.input_audio_transcription.logprobs`
+
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+ (mono), and little-endian byte order.
+
+ input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+ off. Noise reduction filters audio added to the input audio buffer before it is
+ sent to VAD and the model. Filtering the audio can improve VAD and turn
+ detection accuracy (reducing false positives) and model performance by improving
+ perception of the input audio.
+
+ input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
+ language and prompt for transcription, these offer additional guidance to the
+ transcription service.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+ set to `null` to turn off, in which case the client must manually trigger model
+ response. Server VAD means that the model will detect the start and end of
+ speech based on audio volume and respond at the end of user speech. Semantic VAD
+ is more advanced and uses a turn detection model (in conjuction with VAD) to
+ semantically estimate whether the user has finished speaking, then dynamically
+ sets a timeout based on this probability. For example, if user audio trails off
+ with "uhhm", the model will score a low probability of turn end and wait longer
+ for the user to continue speaking. This can be useful for more natural
+ conversations, but may have a higher latency.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/realtime/transcription_sessions",
+ body=maybe_transform(
+ {
+ "include": include,
+ "input_audio_format": input_audio_format,
+ "input_audio_noise_reduction": input_audio_noise_reduction,
+ "input_audio_transcription": input_audio_transcription,
+ "modalities": modalities,
+ "turn_detection": turn_detection,
+ },
+ transcription_session_create_params.TranscriptionSessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=TranscriptionSession,
+ )
+
+
+class AsyncTranscriptionSessions(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncTranscriptionSessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncTranscriptionSessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncTranscriptionSessionsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ include: List[str] | NotGiven = NOT_GIVEN,
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
+ | NotGiven = NOT_GIVEN,
+ input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> TranscriptionSession:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API specifically for realtime transcriptions. Can be configured with
+ the same session parameters as the `transcription_session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ include:
+ The set of items to include in the transcription. Current available items are:
+
+ - `item.input_audio_transcription.logprobs`
+
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+ (mono), and little-endian byte order.
+
+ input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+ off. Noise reduction filters audio added to the input audio buffer before it is
+ sent to VAD and the model. Filtering the audio can improve VAD and turn
+ detection accuracy (reducing false positives) and model performance by improving
+ perception of the input audio.
+
+ input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
+ language and prompt for transcription, these offer additional guidance to the
+ transcription service.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+ set to `null` to turn off, in which case the client must manually trigger model
+ response. Server VAD means that the model will detect the start and end of
+ speech based on audio volume and respond at the end of user speech. Semantic VAD
+ is more advanced and uses a turn detection model (in conjuction with VAD) to
+ semantically estimate whether the user has finished speaking, then dynamically
+ sets a timeout based on this probability. For example, if user audio trails off
+ with "uhhm", the model will score a low probability of turn end and wait longer
+ for the user to continue speaking. This can be useful for more natural
+ conversations, but may have a higher latency.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/realtime/transcription_sessions",
+ body=await async_maybe_transform(
+ {
+ "include": include,
+ "input_audio_format": input_audio_format,
+ "input_audio_noise_reduction": input_audio_noise_reduction,
+ "input_audio_transcription": input_audio_transcription,
+ "modalities": modalities,
+ "turn_detection": turn_detection,
+ },
+ transcription_session_create_params.TranscriptionSessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=TranscriptionSession,
+ )
+
+
+class TranscriptionSessionsWithRawResponse:
+ def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
+ self._transcription_sessions = transcription_sessions
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ transcription_sessions.create,
+ )
+
+
+class AsyncTranscriptionSessionsWithRawResponse:
+ def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
+ self._transcription_sessions = transcription_sessions
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ transcription_sessions.create,
+ )
+
+
+class TranscriptionSessionsWithStreamingResponse:
+ def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
+ self._transcription_sessions = transcription_sessions
+
+ self.create = to_streamed_response_wrapper(
+ transcription_sessions.create,
+ )
+
+
+class AsyncTranscriptionSessionsWithStreamingResponse:
+ def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
+ self._transcription_sessions = transcription_sessions
+
+ self.create = async_to_streamed_response_wrapper(
+ transcription_sessions.create,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py
new file mode 100644
index 00000000..a66e445b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from .threads import (
+ Threads,
+ AsyncThreads,
+ ThreadsWithRawResponse,
+ AsyncThreadsWithRawResponse,
+ ThreadsWithStreamingResponse,
+ AsyncThreadsWithStreamingResponse,
+)
+from .messages import (
+ Messages,
+ AsyncMessages,
+ MessagesWithRawResponse,
+ AsyncMessagesWithRawResponse,
+ MessagesWithStreamingResponse,
+ AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+ "Runs",
+ "AsyncRuns",
+ "RunsWithRawResponse",
+ "AsyncRunsWithRawResponse",
+ "RunsWithStreamingResponse",
+ "AsyncRunsWithStreamingResponse",
+ "Messages",
+ "AsyncMessages",
+ "MessagesWithRawResponse",
+ "AsyncMessagesWithRawResponse",
+ "MessagesWithStreamingResponse",
+ "AsyncMessagesWithStreamingResponse",
+ "Threads",
+ "AsyncThreads",
+ "ThreadsWithRawResponse",
+ "AsyncThreadsWithRawResponse",
+ "ThreadsWithStreamingResponse",
+ "AsyncThreadsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py
new file mode 100644
index 00000000..e3374aba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py
@@ -0,0 +1,670 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+ AsyncPaginator,
+ make_request_options,
+)
+from ....types.beta.threads import message_list_params, message_create_params, message_update_params
+from ....types.beta.threads.message import Message
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.threads.message_deleted import MessageDeleted
+from ....types.beta.threads.message_content_part_param import MessageContentPartParam
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> MessagesWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return MessagesWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> MessagesWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return MessagesWithStreamingResponse(self)
+
+ def create(
+ self,
+ thread_id: str,
+ *,
+ content: Union[str, Iterable[MessageContentPartParam]],
+ role: Literal["user", "assistant"],
+ attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Create a message.
+
+ Args:
+ content: The text contents of the message.
+
+ role:
+ The role of the entity that is creating the message. Allowed values include:
+
+ - `user`: Indicates the message is sent by an actual user and should be used in
+ most cases to represent user-generated messages.
+ - `assistant`: Indicates the message is generated by the assistant. Use this
+ value to insert messages from the assistant into the conversation.
+
+ attachments: A list of files attached to the message, and the tools they should be added to.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/messages",
+ body=maybe_transform(
+ {
+ "content": content,
+ "role": role,
+ "attachments": attachments,
+ "metadata": metadata,
+ },
+ message_create_params.MessageCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ def retrieve(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Retrieve a message.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/threads/{thread_id}/messages/{message_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ def update(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Modifies a message.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/messages/{message_id}",
+ body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ def list(
+ self,
+ thread_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ run_id: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[Message]:
+ """
+ Returns a list of messages for a given thread.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ run_id: Filter messages by the run ID that generated them.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/messages",
+ page=SyncCursorPage[Message],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ "run_id": run_id,
+ },
+ message_list_params.MessageListParams,
+ ),
+ ),
+ model=Message,
+ )
+
+ def delete(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> MessageDeleted:
+ """
+ Deletes a message.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._delete(
+ f"/threads/{thread_id}/messages/{message_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=MessageDeleted,
+ )
+
+
+class AsyncMessages(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncMessagesWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncMessagesWithStreamingResponse(self)
+
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ content: Union[str, Iterable[MessageContentPartParam]],
+ role: Literal["user", "assistant"],
+ attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Create a message.
+
+ Args:
+ content: The text contents of the message.
+
+ role:
+ The role of the entity that is creating the message. Allowed values include:
+
+ - `user`: Indicates the message is sent by an actual user and should be used in
+ most cases to represent user-generated messages.
+ - `assistant`: Indicates the message is generated by the assistant. Use this
+ value to insert messages from the assistant into the conversation.
+
+ attachments: A list of files attached to the message, and the tools they should be added to.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/messages",
+ body=await async_maybe_transform(
+ {
+ "content": content,
+ "role": role,
+ "attachments": attachments,
+ "metadata": metadata,
+ },
+ message_create_params.MessageCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ async def retrieve(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Retrieve a message.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/threads/{thread_id}/messages/{message_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ async def update(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Modifies a message.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/messages/{message_id}",
+ body=await async_maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ def list(
+ self,
+ thread_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ run_id: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
+ """
+ Returns a list of messages for a given thread.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ run_id: Filter messages by the run ID that generated them.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/messages",
+ page=AsyncCursorPage[Message],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ "run_id": run_id,
+ },
+ message_list_params.MessageListParams,
+ ),
+ ),
+ model=Message,
+ )
+
+ async def delete(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> MessageDeleted:
+ """
+ Deletes a message.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._delete(
+ f"/threads/{thread_id}/messages/{message_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=MessageDeleted,
+ )
+
+
+class MessagesWithRawResponse:
+ def __init__(self, messages: Messages) -> None:
+ self._messages = messages
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ messages.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ messages.retrieve,
+ )
+ self.update = _legacy_response.to_raw_response_wrapper(
+ messages.update,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ messages.list,
+ )
+ self.delete = _legacy_response.to_raw_response_wrapper(
+ messages.delete,
+ )
+
+
+class AsyncMessagesWithRawResponse:
+ def __init__(self, messages: AsyncMessages) -> None:
+ self._messages = messages
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ messages.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ messages.retrieve,
+ )
+ self.update = _legacy_response.async_to_raw_response_wrapper(
+ messages.update,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ messages.list,
+ )
+ self.delete = _legacy_response.async_to_raw_response_wrapper(
+ messages.delete,
+ )
+
+
+class MessagesWithStreamingResponse:
+ def __init__(self, messages: Messages) -> None:
+ self._messages = messages
+
+ self.create = to_streamed_response_wrapper(
+ messages.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ messages.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ messages.update,
+ )
+ self.list = to_streamed_response_wrapper(
+ messages.list,
+ )
+ self.delete = to_streamed_response_wrapper(
+ messages.delete,
+ )
+
+
+class AsyncMessagesWithStreamingResponse:
+ def __init__(self, messages: AsyncMessages) -> None:
+ self._messages = messages
+
+ self.create = async_to_streamed_response_wrapper(
+ messages.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ messages.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ messages.update,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ messages.list,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ messages.delete,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py
new file mode 100644
index 00000000..50aa9fae
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from .steps import (
+ Steps,
+ AsyncSteps,
+ StepsWithRawResponse,
+ AsyncStepsWithRawResponse,
+ StepsWithStreamingResponse,
+ AsyncStepsWithStreamingResponse,
+)
+
+__all__ = [
+ "Steps",
+ "AsyncSteps",
+ "StepsWithRawResponse",
+ "AsyncStepsWithRawResponse",
+ "StepsWithStreamingResponse",
+ "AsyncStepsWithStreamingResponse",
+ "Runs",
+ "AsyncRuns",
+ "RunsWithRawResponse",
+ "AsyncRunsWithRawResponse",
+ "RunsWithStreamingResponse",
+ "AsyncRunsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py
new file mode 100644
index 00000000..acb1c9b2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py
@@ -0,0 +1,2989 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import typing_extensions
+from typing import List, Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..... import _legacy_response
+from .steps import (
+ Steps,
+ AsyncSteps,
+ StepsWithRawResponse,
+ AsyncStepsWithRawResponse,
+ StepsWithStreamingResponse,
+ AsyncStepsWithStreamingResponse,
+)
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+ is_given,
+ required_args,
+ maybe_transform,
+ async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....._streaming import Stream, AsyncStream
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import AsyncPaginator, make_request_options
+from .....lib.streaming import (
+ AssistantEventHandler,
+ AssistantEventHandlerT,
+ AssistantStreamManager,
+ AsyncAssistantEventHandler,
+ AsyncAssistantEventHandlerT,
+ AsyncAssistantStreamManager,
+)
+from .....types.beta.threads import (
+ run_list_params,
+ run_create_params,
+ run_update_params,
+ run_submit_tool_outputs_params,
+)
+from .....types.beta.threads.run import Run
+from .....types.shared.chat_model import ChatModel
+from .....types.shared_params.metadata import Metadata
+from .....types.shared.reasoning_effort import ReasoningEffort
+from .....types.beta.assistant_tool_param import AssistantToolParam
+from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+ @cached_property
+ def steps(self) -> Steps:
+ return Steps(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> RunsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return RunsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> RunsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return RunsWithStreamingResponse(self)
+
+ @overload
+ def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ stream: Literal[True],
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Stream[AssistantStreamEvent]:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ stream: bool,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["assistant_id"], ["assistant_id", "stream"])
+ def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "response_format": response_format,
+ "stream": stream,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_p": top_p,
+ "truncation_strategy": truncation_strategy,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+
+ def retrieve(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Retrieves a run.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/threads/{thread_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ def update(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Modifies a run.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/runs/{run_id}",
+ body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ def list(
+ self,
+ thread_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[Run]:
+ """
+ Returns a list of runs belonging to a thread.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/runs",
+ page=SyncCursorPage[Run],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ },
+ run_list_params.RunListParams,
+ ),
+ ),
+ model=Run,
+ )
+
+ def cancel(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Cancels a run that is `in_progress`.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/runs/{run_id}/cancel",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ def create_and_poll(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to create a run an poll for a terminal state. More information on Run
+ lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = self.create(
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ include=include,
+ additional_instructions=additional_instructions,
+ additional_messages=additional_messages,
+ instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
+ metadata=metadata,
+ model=model,
+ response_format=response_format,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ parallel_tool_calls=parallel_tool_calls,
+ reasoning_effort=reasoning_effort,
+ # We assume we are not streaming when polling
+ stream=False,
+ tools=tools,
+ truncation_strategy=truncation_strategy,
+ top_p=top_p,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return self.poll(
+ run.id,
+ thread_id=thread_id,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ poll_interval_ms=poll_interval_ms,
+ timeout=timeout,
+ )
+
+ @overload
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ """Create a Run stream"""
+ ...
+
+ @overload
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a Run stream"""
+ ...
+
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a Run stream"""
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ make_request = partial(
+ self._post,
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "tools": tools,
+ "truncation_strategy": truncation_strategy,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "top_p": top_p,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+ return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+ def poll(
+ self,
+ run_id: str,
+ thread_id: str,
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to poll a run status until it reaches a terminal state. More
+ information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+ if is_given(poll_interval_ms):
+ extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+ terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+ while True:
+ response = self.with_raw_response.retrieve(
+ thread_id=thread_id,
+ run_id=run_id,
+ extra_headers=extra_headers,
+ extra_body=extra_body,
+ extra_query=extra_query,
+ timeout=timeout,
+ )
+
+ run = response.parse()
+ # Return if we reached a terminal state
+ if run.status in terminal_states:
+ return run
+
+ if not is_given(poll_interval_ms):
+ from_header = response.headers.get("openai-poll-after-ms")
+ if from_header is not None:
+ poll_interval_ms = int(from_header)
+ else:
+ poll_interval_ms = 1000
+
+ self._sleep(poll_interval_ms / 1000)
+
+ @overload
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ """Create a Run stream"""
+ ...
+
+ @overload
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a Run stream"""
+ ...
+
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a Run stream"""
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ make_request = partial(
+ self._post,
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "tools": tools,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+ return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+ @overload
+ def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ stream: Literal[True],
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Stream[AssistantStreamEvent]:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ stream: bool,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+ def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+ body=maybe_transform(
+ {
+ "tool_outputs": tool_outputs,
+ "stream": stream,
+ },
+ run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+
+ def submit_tool_outputs_and_poll(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to submit a tool output to a run and poll for a terminal run state.
+ More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = self.submit_tool_outputs(
+ run_id=run_id,
+ thread_id=thread_id,
+ tool_outputs=tool_outputs,
+ stream=False,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return self.poll(
+ run_id=run.id,
+ thread_id=thread_id,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ poll_interval_ms=poll_interval_ms,
+ )
+
+ @overload
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ ...
+
+ @overload
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandlerT]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ ...
+
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = partial(
+ self._post,
+ f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+ body=maybe_transform(
+ {
+ "tool_outputs": tool_outputs,
+ "stream": True,
+ },
+ run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+ return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncRuns(AsyncAPIResource):
+ @cached_property
+ def steps(self) -> AsyncSteps:
+ return AsyncSteps(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncRunsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncRunsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncRunsWithStreamingResponse(self)
+
+ @overload
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ stream: Literal[True],
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncStream[AssistantStreamEvent]:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ stream: bool,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["assistant_id"], ["assistant_id", "stream"])
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/runs",
+ body=await async_maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "response_format": response_format,
+ "stream": stream,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_p": top_p,
+ "truncation_strategy": truncation_strategy,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams),
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+
+ async def retrieve(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Retrieves a run.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/threads/{thread_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ async def update(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Modifies a run.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/runs/{run_id}",
+ body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ def list(
+ self,
+ thread_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+ """
+ Returns a list of runs belonging to a thread.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/runs",
+ page=AsyncCursorPage[Run],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ },
+ run_list_params.RunListParams,
+ ),
+ ),
+ model=Run,
+ )
+
+ async def cancel(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Cancels a run that is `in_progress`.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/runs/{run_id}/cancel",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ async def create_and_poll(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to create a run an poll for a terminal state. More information on Run
+ lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = await self.create(
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ include=include,
+ additional_instructions=additional_instructions,
+ additional_messages=additional_messages,
+ instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
+ metadata=metadata,
+ model=model,
+ response_format=response_format,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ parallel_tool_calls=parallel_tool_calls,
+ reasoning_effort=reasoning_effort,
+ # We assume we are not streaming when polling
+ stream=False,
+ tools=tools,
+ truncation_strategy=truncation_strategy,
+ top_p=top_p,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return await self.poll(
+ run.id,
+ thread_id=thread_id,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ poll_interval_ms=poll_interval_ms,
+ timeout=timeout,
+ )
+
+ @overload
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ """Create a Run stream"""
+ ...
+
+ @overload
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+ """Create a Run stream"""
+ ...
+
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> (
+ AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+ | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+ ):
+ """Create a Run stream"""
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "tools": tools,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ "parallel_tool_calls": parallel_tool_calls,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+ return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+ async def poll(
+ self,
+ run_id: str,
+ thread_id: str,
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to poll a run status until it reaches a terminal state. More
+ information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+ if is_given(poll_interval_ms):
+ extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+ terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+ while True:
+ response = await self.with_raw_response.retrieve(
+ thread_id=thread_id,
+ run_id=run_id,
+ extra_headers=extra_headers,
+ extra_body=extra_body,
+ extra_query=extra_query,
+ timeout=timeout,
+ )
+
+ run = response.parse()
+ # Return if we reached a terminal state
+ if run.status in terminal_states:
+ return run
+
+ if not is_given(poll_interval_ms):
+ from_header = response.headers.get("openai-poll-after-ms")
+ if from_header is not None:
+ poll_interval_ms = int(from_header)
+ else:
+ poll_interval_ms = 1000
+
+ await self._sleep(poll_interval_ms / 1000)
+
+ @overload
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ """Create a Run stream"""
+ ...
+
+ @overload
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+ """Create a Run stream"""
+ ...
+
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> (
+ AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+ | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+ ):
+ """Create a Run stream"""
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "tools": tools,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+ return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+ @overload
+ async def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ stream: Literal[True],
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncStream[AssistantStreamEvent]:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ stream: bool,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+ async def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+ body=await async_maybe_transform(
+ {
+ "tool_outputs": tool_outputs,
+ "stream": stream,
+ },
+ run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+
+ async def submit_tool_outputs_and_poll(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to submit a tool output to a run and poll for a terminal run state.
+ More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = await self.submit_tool_outputs(
+ run_id=run_id,
+ thread_id=thread_id,
+ tool_outputs=tool_outputs,
+ stream=False,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return await self.poll(
+ run_id=run.id,
+ thread_id=thread_id,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ poll_interval_ms=poll_interval_ms,
+ )
+
+ @overload
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ ...
+
+ @overload
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ ...
+
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> (
+ AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+ | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+ ):
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+ body=maybe_transform(
+ {
+ "tool_outputs": tool_outputs,
+ "stream": True,
+ },
+ run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+ return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+
+class RunsWithRawResponse:
+ def __init__(self, runs: Runs) -> None:
+ self._runs = runs
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ runs.retrieve,
+ )
+ self.update = _legacy_response.to_raw_response_wrapper(
+ runs.update,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ runs.list,
+ )
+ self.cancel = _legacy_response.to_raw_response_wrapper(
+ runs.cancel,
+ )
+ self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper(
+ runs.submit_tool_outputs,
+ )
+
+ @cached_property
+ def steps(self) -> StepsWithRawResponse:
+ return StepsWithRawResponse(self._runs.steps)
+
+
+class AsyncRunsWithRawResponse:
+ def __init__(self, runs: AsyncRuns) -> None:
+ self._runs = runs
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ runs.retrieve,
+ )
+ self.update = _legacy_response.async_to_raw_response_wrapper(
+ runs.update,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ runs.list,
+ )
+ self.cancel = _legacy_response.async_to_raw_response_wrapper(
+ runs.cancel,
+ )
+ self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper(
+ runs.submit_tool_outputs,
+ )
+
+ @cached_property
+ def steps(self) -> AsyncStepsWithRawResponse:
+ return AsyncStepsWithRawResponse(self._runs.steps)
+
+
+class RunsWithStreamingResponse:
+ def __init__(self, runs: Runs) -> None:
+ self._runs = runs
+
+ self.create = to_streamed_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ runs.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ runs.update,
+ )
+ self.list = to_streamed_response_wrapper(
+ runs.list,
+ )
+ self.cancel = to_streamed_response_wrapper(
+ runs.cancel,
+ )
+ self.submit_tool_outputs = to_streamed_response_wrapper(
+ runs.submit_tool_outputs,
+ )
+
+ @cached_property
+ def steps(self) -> StepsWithStreamingResponse:
+ return StepsWithStreamingResponse(self._runs.steps)
+
+
+class AsyncRunsWithStreamingResponse:
+ def __init__(self, runs: AsyncRuns) -> None:
+ self._runs = runs
+
+ self.create = async_to_streamed_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ runs.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ runs.update,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ runs.list,
+ )
+ self.cancel = async_to_streamed_response_wrapper(
+ runs.cancel,
+ )
+ self.submit_tool_outputs = async_to_streamed_response_wrapper(
+ runs.submit_tool_outputs,
+ )
+
+ @cached_property
+ def steps(self) -> AsyncStepsWithStreamingResponse:
+ return AsyncStepsWithStreamingResponse(self._runs.steps)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py
new file mode 100644
index 00000000..709c729d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py
@@ -0,0 +1,381 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from ..... import _legacy_response
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import AsyncPaginator, make_request_options
+from .....types.beta.threads.runs import step_list_params, step_retrieve_params
+from .....types.beta.threads.runs.run_step import RunStep
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+
+__all__ = ["Steps", "AsyncSteps"]
+
+
+class Steps(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> StepsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return StepsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> StepsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return StepsWithStreamingResponse(self)
+
+ def retrieve(
+ self,
+ step_id: str,
+ *,
+ thread_id: str,
+ run_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunStep:
+ """
+ Retrieves a run step.
+
+ Args:
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ if not step_id:
+ raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
+ ),
+ cast_to=RunStep,
+ )
+
+ def list(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[RunStep]:
+ """
+ Returns a list of run steps belonging to a run.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/runs/{run_id}/steps",
+ page=SyncCursorPage[RunStep],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "include": include,
+ "limit": limit,
+ "order": order,
+ },
+ step_list_params.StepListParams,
+ ),
+ ),
+ model=RunStep,
+ )
+
+
+class AsyncSteps(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncStepsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncStepsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncStepsWithStreamingResponse(self)
+
+ async def retrieve(
+ self,
+ step_id: str,
+ *,
+ thread_id: str,
+ run_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunStep:
+ """
+ Retrieves a run step.
+
+ Args:
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ if not step_id:
+ raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
+ ),
+ cast_to=RunStep,
+ )
+
+ def list(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[RunStep, AsyncCursorPage[RunStep]]:
+ """
+ Returns a list of run steps belonging to a run.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/runs/{run_id}/steps",
+ page=AsyncCursorPage[RunStep],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "include": include,
+ "limit": limit,
+ "order": order,
+ },
+ step_list_params.StepListParams,
+ ),
+ ),
+ model=RunStep,
+ )
+
+
+class StepsWithRawResponse:
+ def __init__(self, steps: Steps) -> None:
+ self._steps = steps
+
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ steps.retrieve,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ steps.list,
+ )
+
+
+class AsyncStepsWithRawResponse:
+ def __init__(self, steps: AsyncSteps) -> None:
+ self._steps = steps
+
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ steps.retrieve,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ steps.list,
+ )
+
+
+class StepsWithStreamingResponse:
+ def __init__(self, steps: Steps) -> None:
+ self._steps = steps
+
+ self.retrieve = to_streamed_response_wrapper(
+ steps.retrieve,
+ )
+ self.list = to_streamed_response_wrapper(
+ steps.list,
+ )
+
+
+class AsyncStepsWithStreamingResponse:
+ def __init__(self, steps: AsyncSteps) -> None:
+ self._steps = steps
+
+ self.retrieve = async_to_streamed_response_wrapper(
+ steps.retrieve,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ steps.list,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py
new file mode 100644
index 00000000..d88559bd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py
@@ -0,0 +1,1875 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .... import _legacy_response
+from .messages import (
+ Messages,
+ AsyncMessages,
+ MessagesWithRawResponse,
+ AsyncMessagesWithRawResponse,
+ MessagesWithStreamingResponse,
+ AsyncMessagesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ required_args,
+ maybe_transform,
+ async_maybe_transform,
+)
+from .runs.runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
+from ....types.beta import (
+ thread_create_params,
+ thread_update_params,
+ thread_create_and_run_params,
+)
+from ...._base_client import make_request_options
+from ....lib.streaming import (
+ AssistantEventHandler,
+ AssistantEventHandlerT,
+ AssistantStreamManager,
+ AsyncAssistantEventHandler,
+ AsyncAssistantEventHandlerT,
+ AsyncAssistantStreamManager,
+)
+from ....types.beta.thread import Thread
+from ....types.beta.threads.run import Run
+from ....types.shared.chat_model import ChatModel
+from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.assistant_stream_event import AssistantStreamEvent
+from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Threads", "AsyncThreads"]
+
+
+class Threads(SyncAPIResource):
+ @cached_property
+ def runs(self) -> Runs:
+ return Runs(self._client)
+
+ @cached_property
+ def messages(self) -> Messages:
+ return Messages(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> ThreadsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return ThreadsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return ThreadsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Create a thread.
+
+ Args:
+ messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+ start the thread with.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ tool_resources: A set of resources that are made available to the assistant's tools in this
+ thread. The resources are specific to the type of tool. For example, the
+ `code_interpreter` tool requires a list of file IDs, while the `file_search`
+ tool requires a list of vector store IDs.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/threads",
+ body=maybe_transform(
+ {
+ "messages": messages,
+ "metadata": metadata,
+ "tool_resources": tool_resources,
+ },
+ thread_create_params.ThreadCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ def retrieve(
+ self,
+ thread_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Retrieves a thread.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/threads/{thread_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ def update(
+ self,
+ thread_id: str,
+ *,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Modifies a thread.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ tool_resources: A set of resources that are made available to the assistant's tools in this
+ thread. The resources are specific to the type of tool. For example, the
+ `code_interpreter` tool requires a list of file IDs, while the `file_search`
+ tool requires a list of vector store IDs.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}",
+ body=maybe_transform(
+ {
+ "metadata": metadata,
+ "tool_resources": tool_resources,
+ },
+ thread_update_params.ThreadUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ def delete(
+ self,
+ thread_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ThreadDeleted:
+ """
+ Delete a thread.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._delete(
+ f"/threads/{thread_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ThreadDeleted,
+ )
+
+ @overload
+ def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ stream: Literal[True],
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Stream[AssistantStreamEvent]:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ stream: bool,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["assistant_id"], ["assistant_id", "stream"])
+ def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/threads/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "response_format": response_format,
+ "stream": stream,
+ "temperature": temperature,
+ "thread": thread,
+ "tool_choice": tool_choice,
+ "tool_resources": tool_resources,
+ "tools": tools,
+ "top_p": top_p,
+ "truncation_strategy": truncation_strategy,
+ },
+ thread_create_and_run_params.ThreadCreateAndRunParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+
+ def create_and_run_poll(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to create a thread, start a run and then poll for a terminal state.
+ More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = self.create_and_run(
+ assistant_id=assistant_id,
+ instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
+ metadata=metadata,
+ model=model,
+ parallel_tool_calls=parallel_tool_calls,
+ response_format=response_format,
+ temperature=temperature,
+ stream=False,
+ thread=thread,
+ tool_resources=tool_resources,
+ tool_choice=tool_choice,
+ truncation_strategy=truncation_strategy,
+ top_p=top_p,
+ tools=tools,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)
+
+ @overload
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ """Create a thread and stream the run back"""
+ ...
+
+ @overload
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ event_handler: AssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a thread and stream the run back"""
+ ...
+
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ event_handler: AssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a thread and stream the run back"""
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ make_request = partial(
+ self._post,
+ "/threads/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "thread": thread,
+ "tools": tools,
+ "tool_resources": tool_resources,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ },
+ thread_create_and_run_params.ThreadCreateAndRunParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+ return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncThreads(AsyncAPIResource):
+ @cached_property
+ def runs(self) -> AsyncRuns:
+ return AsyncRuns(self._client)
+
+ @cached_property
+ def messages(self) -> AsyncMessages:
+ return AsyncMessages(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncThreadsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncThreadsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Create a thread.
+
+ Args:
+ messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+ start the thread with.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ tool_resources: A set of resources that are made available to the assistant's tools in this
+ thread. The resources are specific to the type of tool. For example, the
+ `code_interpreter` tool requires a list of file IDs, while the `file_search`
+ tool requires a list of vector store IDs.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/threads",
+ body=await async_maybe_transform(
+ {
+ "messages": messages,
+ "metadata": metadata,
+ "tool_resources": tool_resources,
+ },
+ thread_create_params.ThreadCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ async def retrieve(
+ self,
+ thread_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Retrieves a thread.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/threads/{thread_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ async def update(
+ self,
+ thread_id: str,
+ *,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Modifies a thread.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ tool_resources: A set of resources that are made available to the assistant's tools in this
+ thread. The resources are specific to the type of tool. For example, the
+ `code_interpreter` tool requires a list of file IDs, while the `file_search`
+ tool requires a list of vector store IDs.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}",
+ body=await async_maybe_transform(
+ {
+ "metadata": metadata,
+ "tool_resources": tool_resources,
+ },
+ thread_update_params.ThreadUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ async def delete(
+ self,
+ thread_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ThreadDeleted:
+ """
+ Delete a thread.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._delete(
+ f"/threads/{thread_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ThreadDeleted,
+ )
+
+ @overload
+ async def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ stream: Literal[True],
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncStream[AssistantStreamEvent]:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ stream: bool,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["assistant_id"], ["assistant_id", "stream"])
+ async def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/threads/runs",
+ body=await async_maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "response_format": response_format,
+ "stream": stream,
+ "temperature": temperature,
+ "thread": thread,
+ "tool_choice": tool_choice,
+ "tool_resources": tool_resources,
+ "tools": tools,
+ "top_p": top_p,
+ "truncation_strategy": truncation_strategy,
+ },
+ thread_create_and_run_params.ThreadCreateAndRunParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+
+ async def create_and_run_poll(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to create a thread, start a run and then poll for a terminal state.
+ More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = await self.create_and_run(
+ assistant_id=assistant_id,
+ instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
+ metadata=metadata,
+ model=model,
+ parallel_tool_calls=parallel_tool_calls,
+ response_format=response_format,
+ temperature=temperature,
+ stream=False,
+ thread=thread,
+ tool_resources=tool_resources,
+ tool_choice=tool_choice,
+ truncation_strategy=truncation_strategy,
+ top_p=top_p,
+ tools=tools,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return await self.runs.poll(
+ run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
+ )
+
+ @overload
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ """Create a thread and stream the run back"""
+ ...
+
+ @overload
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ event_handler: AsyncAssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+ """Create a thread and stream the run back"""
+ ...
+
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ event_handler: AsyncAssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> (
+ AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+ | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+ ):
+ """Create a thread and stream the run back"""
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ "/threads/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "thread": thread,
+ "tools": tools,
+ "tool_resources": tool_resources,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ },
+ thread_create_and_run_params.ThreadCreateAndRunParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+ return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+
+class ThreadsWithRawResponse:
+ def __init__(self, threads: Threads) -> None:
+ self._threads = threads
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ threads.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ threads.retrieve,
+ )
+ self.update = _legacy_response.to_raw_response_wrapper(
+ threads.update,
+ )
+ self.delete = _legacy_response.to_raw_response_wrapper(
+ threads.delete,
+ )
+ self.create_and_run = _legacy_response.to_raw_response_wrapper(
+ threads.create_and_run,
+ )
+
+ @cached_property
+ def runs(self) -> RunsWithRawResponse:
+ return RunsWithRawResponse(self._threads.runs)
+
+ @cached_property
+ def messages(self) -> MessagesWithRawResponse:
+ return MessagesWithRawResponse(self._threads.messages)
+
+
+class AsyncThreadsWithRawResponse:
+ def __init__(self, threads: AsyncThreads) -> None:
+ self._threads = threads
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ threads.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ threads.retrieve,
+ )
+ self.update = _legacy_response.async_to_raw_response_wrapper(
+ threads.update,
+ )
+ self.delete = _legacy_response.async_to_raw_response_wrapper(
+ threads.delete,
+ )
+ self.create_and_run = _legacy_response.async_to_raw_response_wrapper(
+ threads.create_and_run,
+ )
+
+ @cached_property
+ def runs(self) -> AsyncRunsWithRawResponse:
+ return AsyncRunsWithRawResponse(self._threads.runs)
+
+ @cached_property
+ def messages(self) -> AsyncMessagesWithRawResponse:
+ return AsyncMessagesWithRawResponse(self._threads.messages)
+
+
+class ThreadsWithStreamingResponse:
+ def __init__(self, threads: Threads) -> None:
+ self._threads = threads
+
+ self.create = to_streamed_response_wrapper(
+ threads.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ threads.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ threads.update,
+ )
+ self.delete = to_streamed_response_wrapper(
+ threads.delete,
+ )
+ self.create_and_run = to_streamed_response_wrapper(
+ threads.create_and_run,
+ )
+
+ @cached_property
+ def runs(self) -> RunsWithStreamingResponse:
+ return RunsWithStreamingResponse(self._threads.runs)
+
+ @cached_property
+ def messages(self) -> MessagesWithStreamingResponse:
+ return MessagesWithStreamingResponse(self._threads.messages)
+
+
+class AsyncThreadsWithStreamingResponse:
+ def __init__(self, threads: AsyncThreads) -> None:
+ self._threads = threads
+
+ self.create = async_to_streamed_response_wrapper(
+ threads.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ threads.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ threads.update,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ threads.delete,
+ )
+ self.create_and_run = async_to_streamed_response_wrapper(
+ threads.create_and_run,
+ )
+
+ @cached_property
+ def runs(self) -> AsyncRunsWithStreamingResponse:
+ return AsyncRunsWithStreamingResponse(self._threads.runs)
+
+ @cached_property
+ def messages(self) -> AsyncMessagesWithStreamingResponse:
+ return AsyncMessagesWithStreamingResponse(self._threads.messages)