aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/openai/resources/beta/threads
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/openai/resources/beta/threads')
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py47
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py670
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py33
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py2989
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py381
-rw-r--r--.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py1875
6 files changed, 5995 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py
new file mode 100644
index 00000000..a66e445b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from .threads import (
+ Threads,
+ AsyncThreads,
+ ThreadsWithRawResponse,
+ AsyncThreadsWithRawResponse,
+ ThreadsWithStreamingResponse,
+ AsyncThreadsWithStreamingResponse,
+)
+from .messages import (
+ Messages,
+ AsyncMessages,
+ MessagesWithRawResponse,
+ AsyncMessagesWithRawResponse,
+ MessagesWithStreamingResponse,
+ AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+ "Runs",
+ "AsyncRuns",
+ "RunsWithRawResponse",
+ "AsyncRunsWithRawResponse",
+ "RunsWithStreamingResponse",
+ "AsyncRunsWithStreamingResponse",
+ "Messages",
+ "AsyncMessages",
+ "MessagesWithRawResponse",
+ "AsyncMessagesWithRawResponse",
+ "MessagesWithStreamingResponse",
+ "AsyncMessagesWithStreamingResponse",
+ "Threads",
+ "AsyncThreads",
+ "ThreadsWithRawResponse",
+ "AsyncThreadsWithRawResponse",
+ "ThreadsWithStreamingResponse",
+ "AsyncThreadsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py
new file mode 100644
index 00000000..e3374aba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/messages.py
@@ -0,0 +1,670 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+ AsyncPaginator,
+ make_request_options,
+)
+from ....types.beta.threads import message_list_params, message_create_params, message_update_params
+from ....types.beta.threads.message import Message
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.threads.message_deleted import MessageDeleted
+from ....types.beta.threads.message_content_part_param import MessageContentPartParam
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> MessagesWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return MessagesWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> MessagesWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return MessagesWithStreamingResponse(self)
+
+ def create(
+ self,
+ thread_id: str,
+ *,
+ content: Union[str, Iterable[MessageContentPartParam]],
+ role: Literal["user", "assistant"],
+ attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Create a message.
+
+ Args:
+ content: The text contents of the message.
+
+ role:
+ The role of the entity that is creating the message. Allowed values include:
+
+ - `user`: Indicates the message is sent by an actual user and should be used in
+ most cases to represent user-generated messages.
+ - `assistant`: Indicates the message is generated by the assistant. Use this
+ value to insert messages from the assistant into the conversation.
+
+ attachments: A list of files attached to the message, and the tools they should be added to.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/messages",
+ body=maybe_transform(
+ {
+ "content": content,
+ "role": role,
+ "attachments": attachments,
+ "metadata": metadata,
+ },
+ message_create_params.MessageCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ def retrieve(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Retrieve a message.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/threads/{thread_id}/messages/{message_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ def update(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Modifies a message.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/messages/{message_id}",
+ body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ def list(
+ self,
+ thread_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ run_id: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[Message]:
+ """
+ Returns a list of messages for a given thread.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ run_id: Filter messages by the run ID that generated them.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/messages",
+ page=SyncCursorPage[Message],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ "run_id": run_id,
+ },
+ message_list_params.MessageListParams,
+ ),
+ ),
+ model=Message,
+ )
+
+ def delete(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> MessageDeleted:
+ """
+ Deletes a message.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._delete(
+ f"/threads/{thread_id}/messages/{message_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=MessageDeleted,
+ )
+
+
+class AsyncMessages(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncMessagesWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncMessagesWithStreamingResponse(self)
+
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ content: Union[str, Iterable[MessageContentPartParam]],
+ role: Literal["user", "assistant"],
+ attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Create a message.
+
+ Args:
+ content: The text contents of the message.
+
+ role:
+ The role of the entity that is creating the message. Allowed values include:
+
+ - `user`: Indicates the message is sent by an actual user and should be used in
+ most cases to represent user-generated messages.
+ - `assistant`: Indicates the message is generated by the assistant. Use this
+ value to insert messages from the assistant into the conversation.
+
+ attachments: A list of files attached to the message, and the tools they should be added to.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/messages",
+ body=await async_maybe_transform(
+ {
+ "content": content,
+ "role": role,
+ "attachments": attachments,
+ "metadata": metadata,
+ },
+ message_create_params.MessageCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ async def retrieve(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Retrieve a message.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/threads/{thread_id}/messages/{message_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ async def update(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Message:
+ """
+ Modifies a message.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/messages/{message_id}",
+ body=await async_maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Message,
+ )
+
+ def list(
+ self,
+ thread_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ run_id: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
+ """
+ Returns a list of messages for a given thread.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ run_id: Filter messages by the run ID that generated them.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/messages",
+ page=AsyncCursorPage[Message],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ "run_id": run_id,
+ },
+ message_list_params.MessageListParams,
+ ),
+ ),
+ model=Message,
+ )
+
+ async def delete(
+ self,
+ message_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> MessageDeleted:
+ """
+ Deletes a message.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not message_id:
+ raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._delete(
+ f"/threads/{thread_id}/messages/{message_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=MessageDeleted,
+ )
+
+
+class MessagesWithRawResponse:
+ def __init__(self, messages: Messages) -> None:
+ self._messages = messages
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ messages.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ messages.retrieve,
+ )
+ self.update = _legacy_response.to_raw_response_wrapper(
+ messages.update,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ messages.list,
+ )
+ self.delete = _legacy_response.to_raw_response_wrapper(
+ messages.delete,
+ )
+
+
+class AsyncMessagesWithRawResponse:
+ def __init__(self, messages: AsyncMessages) -> None:
+ self._messages = messages
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ messages.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ messages.retrieve,
+ )
+ self.update = _legacy_response.async_to_raw_response_wrapper(
+ messages.update,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ messages.list,
+ )
+ self.delete = _legacy_response.async_to_raw_response_wrapper(
+ messages.delete,
+ )
+
+
+class MessagesWithStreamingResponse:
+ def __init__(self, messages: Messages) -> None:
+ self._messages = messages
+
+ self.create = to_streamed_response_wrapper(
+ messages.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ messages.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ messages.update,
+ )
+ self.list = to_streamed_response_wrapper(
+ messages.list,
+ )
+ self.delete = to_streamed_response_wrapper(
+ messages.delete,
+ )
+
+
+class AsyncMessagesWithStreamingResponse:
+ def __init__(self, messages: AsyncMessages) -> None:
+ self._messages = messages
+
+ self.create = async_to_streamed_response_wrapper(
+ messages.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ messages.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ messages.update,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ messages.list,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ messages.delete,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py
new file mode 100644
index 00000000..50aa9fae
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from .steps import (
+ Steps,
+ AsyncSteps,
+ StepsWithRawResponse,
+ AsyncStepsWithRawResponse,
+ StepsWithStreamingResponse,
+ AsyncStepsWithStreamingResponse,
+)
+
+__all__ = [
+ "Steps",
+ "AsyncSteps",
+ "StepsWithRawResponse",
+ "AsyncStepsWithRawResponse",
+ "StepsWithStreamingResponse",
+ "AsyncStepsWithStreamingResponse",
+ "Runs",
+ "AsyncRuns",
+ "RunsWithRawResponse",
+ "AsyncRunsWithRawResponse",
+ "RunsWithStreamingResponse",
+ "AsyncRunsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py
new file mode 100644
index 00000000..acb1c9b2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/runs.py
@@ -0,0 +1,2989 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import typing_extensions
+from typing import List, Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..... import _legacy_response
+from .steps import (
+ Steps,
+ AsyncSteps,
+ StepsWithRawResponse,
+ AsyncStepsWithRawResponse,
+ StepsWithStreamingResponse,
+ AsyncStepsWithStreamingResponse,
+)
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+ is_given,
+ required_args,
+ maybe_transform,
+ async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....._streaming import Stream, AsyncStream
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import AsyncPaginator, make_request_options
+from .....lib.streaming import (
+ AssistantEventHandler,
+ AssistantEventHandlerT,
+ AssistantStreamManager,
+ AsyncAssistantEventHandler,
+ AsyncAssistantEventHandlerT,
+ AsyncAssistantStreamManager,
+)
+from .....types.beta.threads import (
+ run_list_params,
+ run_create_params,
+ run_update_params,
+ run_submit_tool_outputs_params,
+)
+from .....types.beta.threads.run import Run
+from .....types.shared.chat_model import ChatModel
+from .....types.shared_params.metadata import Metadata
+from .....types.shared.reasoning_effort import ReasoningEffort
+from .....types.beta.assistant_tool_param import AssistantToolParam
+from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+ @cached_property
+ def steps(self) -> Steps:
+ return Steps(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> RunsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return RunsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> RunsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return RunsWithStreamingResponse(self)
+
+ @overload
+ def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ stream: Literal[True],
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Stream[AssistantStreamEvent]:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ stream: bool,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["assistant_id"], ["assistant_id", "stream"])
+ def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "response_format": response_format,
+ "stream": stream,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_p": top_p,
+ "truncation_strategy": truncation_strategy,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+
+ def retrieve(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Retrieves a run.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/threads/{thread_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ def update(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Modifies a run.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/runs/{run_id}",
+ body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ def list(
+ self,
+ thread_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[Run]:
+ """
+ Returns a list of runs belonging to a thread.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/runs",
+ page=SyncCursorPage[Run],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ },
+ run_list_params.RunListParams,
+ ),
+ ),
+ model=Run,
+ )
+
+ def cancel(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Cancels a run that is `in_progress`.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/runs/{run_id}/cancel",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ def create_and_poll(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to create a run an poll for a terminal state. More information on Run
+ lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = self.create(
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ include=include,
+ additional_instructions=additional_instructions,
+ additional_messages=additional_messages,
+ instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
+ metadata=metadata,
+ model=model,
+ response_format=response_format,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ parallel_tool_calls=parallel_tool_calls,
+ reasoning_effort=reasoning_effort,
+ # We assume we are not streaming when polling
+ stream=False,
+ tools=tools,
+ truncation_strategy=truncation_strategy,
+ top_p=top_p,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return self.poll(
+ run.id,
+ thread_id=thread_id,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ poll_interval_ms=poll_interval_ms,
+ timeout=timeout,
+ )
+
+ @overload
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ """Create a Run stream"""
+ ...
+
+ @overload
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a Run stream"""
+ ...
+
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a Run stream"""
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ make_request = partial(
+ self._post,
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "tools": tools,
+ "truncation_strategy": truncation_strategy,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "top_p": top_p,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+ return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+ def poll(
+ self,
+ run_id: str,
+ thread_id: str,
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to poll a run status until it reaches a terminal state. More
+ information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+ if is_given(poll_interval_ms):
+ extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+ terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+ while True:
+ response = self.with_raw_response.retrieve(
+ thread_id=thread_id,
+ run_id=run_id,
+ extra_headers=extra_headers,
+ extra_body=extra_body,
+ extra_query=extra_query,
+ timeout=timeout,
+ )
+
+ run = response.parse()
+ # Return if we reached a terminal state
+ if run.status in terminal_states:
+ return run
+
+ if not is_given(poll_interval_ms):
+ from_header = response.headers.get("openai-poll-after-ms")
+ if from_header is not None:
+ poll_interval_ms = int(from_header)
+ else:
+ poll_interval_ms = 1000
+
+ self._sleep(poll_interval_ms / 1000)
+
+ @overload
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ """Create a Run stream"""
+ ...
+
+ @overload
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a Run stream"""
+ ...
+
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a Run stream"""
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ make_request = partial(
+ self._post,
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "tools": tools,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+ return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+ @overload
+ def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ stream: Literal[True],
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Stream[AssistantStreamEvent]:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ stream: bool,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+ def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+ body=maybe_transform(
+ {
+ "tool_outputs": tool_outputs,
+ "stream": stream,
+ },
+ run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+
+ def submit_tool_outputs_and_poll(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to submit a tool output to a run and poll for a terminal run state.
+ More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = self.submit_tool_outputs(
+ run_id=run_id,
+ thread_id=thread_id,
+ tool_outputs=tool_outputs,
+ stream=False,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return self.poll(
+ run_id=run.id,
+ thread_id=thread_id,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ poll_interval_ms=poll_interval_ms,
+ )
+
+ @overload
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ ...
+
+ @overload
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandlerT]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ ...
+
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ event_handler: AssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = partial(
+ self._post,
+ f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+ body=maybe_transform(
+ {
+ "tool_outputs": tool_outputs,
+ "stream": True,
+ },
+ run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+ return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncRuns(AsyncAPIResource):
+ @cached_property
+ def steps(self) -> AsyncSteps:
+ return AsyncSteps(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncRunsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncRunsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncRunsWithStreamingResponse(self)
+
+ @overload
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ stream: Literal[True],
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncStream[AssistantStreamEvent]:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ stream: bool,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ """
+ Create a run.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+ is useful for modifying the behavior on a per-run basis without overriding other
+ instructions.
+
+ additional_messages: Adds additional messages to the thread before creating the run.
+
+ instructions: Overrides the
+ [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+ of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ reasoning_effort: **o-series models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["assistant_id"], ["assistant_id", "stream"])
+ async def create(
+ self,
+ thread_id: str,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/runs",
+ body=await async_maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "response_format": response_format,
+ "stream": stream,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_p": top_p,
+ "truncation_strategy": truncation_strategy,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams),
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+
+ async def retrieve(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Retrieves a run.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/threads/{thread_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ async def update(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Modifies a run.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/runs/{run_id}",
+ body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ def list(
+ self,
+ thread_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+ """
+ Returns a list of runs belonging to a thread.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/runs",
+ page=AsyncCursorPage[Run],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "limit": limit,
+ "order": order,
+ },
+ run_list_params.RunListParams,
+ ),
+ ),
+ model=Run,
+ )
+
+ async def cancel(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Cancels a run that is `in_progress`.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/runs/{run_id}/cancel",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ )
+
+ async def create_and_poll(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to create a run an poll for a terminal state. More information on Run
+ lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = await self.create(
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ include=include,
+ additional_instructions=additional_instructions,
+ additional_messages=additional_messages,
+ instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
+ metadata=metadata,
+ model=model,
+ response_format=response_format,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ parallel_tool_calls=parallel_tool_calls,
+ reasoning_effort=reasoning_effort,
+ # We assume we are not streaming when polling
+ stream=False,
+ tools=tools,
+ truncation_strategy=truncation_strategy,
+ top_p=top_p,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return await self.poll(
+ run.id,
+ thread_id=thread_id,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ poll_interval_ms=poll_interval_ms,
+ timeout=timeout,
+ )
+
+ @overload
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ """Create a Run stream"""
+ ...
+
+ @overload
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+ """Create a Run stream"""
+ ...
+
+ @typing_extensions.deprecated("use `stream` instead")
+ def create_and_stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> (
+ AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+ | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+ ):
+ """Create a Run stream"""
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "tools": tools,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ "parallel_tool_calls": parallel_tool_calls,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+ return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+ async def poll(
+ self,
+ run_id: str,
+ thread_id: str,
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to poll a run status until it reaches a terminal state. More
+ information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+ if is_given(poll_interval_ms):
+ extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+ terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+ while True:
+ response = await self.with_raw_response.retrieve(
+ thread_id=thread_id,
+ run_id=run_id,
+ extra_headers=extra_headers,
+ extra_body=extra_body,
+ extra_query=extra_query,
+ timeout=timeout,
+ )
+
+ run = response.parse()
+ # Return if we reached a terminal state
+ if run.status in terminal_states:
+ return run
+
+ if not is_given(poll_interval_ms):
+ from_header = response.headers.get("openai-poll-after-ms")
+ if from_header is not None:
+ poll_interval_ms = int(from_header)
+ else:
+ poll_interval_ms = 1000
+
+ await self._sleep(poll_interval_ms / 1000)
+
+ @overload
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ """Create a Run stream"""
+ ...
+
+ @overload
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+ """Create a Run stream"""
+ ...
+
+ def stream(
+ self,
+ *,
+ assistant_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> (
+ AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+ | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+ ):
+ """Create a Run stream"""
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ f"/threads/{thread_id}/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "additional_messages": additional_messages,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "tools": tools,
+ "parallel_tool_calls": parallel_tool_calls,
+ "reasoning_effort": reasoning_effort,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+ return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+ @overload
+ async def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ stream: Literal[True],
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncStream[AssistantStreamEvent]:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ stream: bool,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ """
+ When a run has the `status: "requires_action"` and `required_action.type` is
+ `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+ tool calls once they're all completed. All outputs must be submitted in a single
+ request.
+
+ Args:
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ tool_outputs: A list of tools for which the outputs are being submitted.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+ async def submit_tool_outputs(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+ body=await async_maybe_transform(
+ {
+ "tool_outputs": tool_outputs,
+ "stream": stream,
+ },
+ run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+
+ async def submit_tool_outputs_and_poll(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to submit a tool output to a run and poll for a terminal run state.
+ More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = await self.submit_tool_outputs(
+ run_id=run_id,
+ thread_id=thread_id,
+ tool_outputs=tool_outputs,
+ stream=False,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return await self.poll(
+ run_id=run.id,
+ thread_id=thread_id,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ poll_interval_ms=poll_interval_ms,
+ )
+
+ @overload
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ ...
+
+ @overload
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ ...
+
+ def submit_tool_outputs_stream(
+ self,
+ *,
+ tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+ run_id: str,
+ thread_id: str,
+ event_handler: AsyncAssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> (
+ AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+ | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+ ):
+ """
+ Submit the tool outputs from a previous run and stream the run to a terminal
+ state. More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+ body=maybe_transform(
+ {
+ "tool_outputs": tool_outputs,
+ "stream": True,
+ },
+ run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+ return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+
+class RunsWithRawResponse:
+ def __init__(self, runs: Runs) -> None:
+ self._runs = runs
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ runs.retrieve,
+ )
+ self.update = _legacy_response.to_raw_response_wrapper(
+ runs.update,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ runs.list,
+ )
+ self.cancel = _legacy_response.to_raw_response_wrapper(
+ runs.cancel,
+ )
+ self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper(
+ runs.submit_tool_outputs,
+ )
+
+ @cached_property
+ def steps(self) -> StepsWithRawResponse:
+ return StepsWithRawResponse(self._runs.steps)
+
+
+class AsyncRunsWithRawResponse:
+ def __init__(self, runs: AsyncRuns) -> None:
+ self._runs = runs
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ runs.retrieve,
+ )
+ self.update = _legacy_response.async_to_raw_response_wrapper(
+ runs.update,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ runs.list,
+ )
+ self.cancel = _legacy_response.async_to_raw_response_wrapper(
+ runs.cancel,
+ )
+ self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper(
+ runs.submit_tool_outputs,
+ )
+
+ @cached_property
+ def steps(self) -> AsyncStepsWithRawResponse:
+ return AsyncStepsWithRawResponse(self._runs.steps)
+
+
+class RunsWithStreamingResponse:
+ def __init__(self, runs: Runs) -> None:
+ self._runs = runs
+
+ self.create = to_streamed_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ runs.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ runs.update,
+ )
+ self.list = to_streamed_response_wrapper(
+ runs.list,
+ )
+ self.cancel = to_streamed_response_wrapper(
+ runs.cancel,
+ )
+ self.submit_tool_outputs = to_streamed_response_wrapper(
+ runs.submit_tool_outputs,
+ )
+
+ @cached_property
+ def steps(self) -> StepsWithStreamingResponse:
+ return StepsWithStreamingResponse(self._runs.steps)
+
+
+class AsyncRunsWithStreamingResponse:
+ def __init__(self, runs: AsyncRuns) -> None:
+ self._runs = runs
+
+ self.create = async_to_streamed_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ runs.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ runs.update,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ runs.list,
+ )
+ self.cancel = async_to_streamed_response_wrapper(
+ runs.cancel,
+ )
+ self.submit_tool_outputs = async_to_streamed_response_wrapper(
+ runs.submit_tool_outputs,
+ )
+
+ @cached_property
+ def steps(self) -> AsyncStepsWithStreamingResponse:
+ return AsyncStepsWithStreamingResponse(self._runs.steps)
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py
new file mode 100644
index 00000000..709c729d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/runs/steps.py
@@ -0,0 +1,381 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from ..... import _legacy_response
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import AsyncPaginator, make_request_options
+from .....types.beta.threads.runs import step_list_params, step_retrieve_params
+from .....types.beta.threads.runs.run_step import RunStep
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+
+__all__ = ["Steps", "AsyncSteps"]
+
+
+class Steps(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> StepsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return StepsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> StepsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return StepsWithStreamingResponse(self)
+
+ def retrieve(
+ self,
+ step_id: str,
+ *,
+ thread_id: str,
+ run_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunStep:
+ """
+ Retrieves a run step.
+
+ Args:
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ if not step_id:
+ raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
+ ),
+ cast_to=RunStep,
+ )
+
+ def list(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[RunStep]:
+ """
+ Returns a list of run steps belonging to a run.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/runs/{run_id}/steps",
+ page=SyncCursorPage[RunStep],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "include": include,
+ "limit": limit,
+ "order": order,
+ },
+ step_list_params.StepListParams,
+ ),
+ ),
+ model=RunStep,
+ )
+
+
+class AsyncSteps(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncStepsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncStepsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncStepsWithStreamingResponse(self)
+
+ async def retrieve(
+ self,
+ step_id: str,
+ *,
+ thread_id: str,
+ run_id: str,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunStep:
+ """
+ Retrieves a run step.
+
+ Args:
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ if not step_id:
+ raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
+ ),
+ cast_to=RunStep,
+ )
+
+ def list(
+ self,
+ run_id: str,
+ *,
+ thread_id: str,
+ after: str | NotGiven = NOT_GIVEN,
+ before: str | NotGiven = NOT_GIVEN,
+ include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[RunStep, AsyncCursorPage[RunStep]]:
+ """
+ Returns a list of run steps belonging to a run.
+
+ Args:
+ after: A cursor for use in pagination. `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ before: A cursor for use in pagination. `before` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ starting with obj_foo, your subsequent call can include before=obj_foo in order
+ to fetch the previous page of the list.
+
+ include: A list of additional fields to include in the response. Currently the only
+ supported value is `step_details.tool_calls[*].file_search.results[*].content`
+ to fetch the file search result content.
+
+ See the
+ [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+ for more information.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+ order and `desc` for descending order.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get_api_list(
+ f"/threads/{thread_id}/runs/{run_id}/steps",
+ page=AsyncCursorPage[RunStep],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "before": before,
+ "include": include,
+ "limit": limit,
+ "order": order,
+ },
+ step_list_params.StepListParams,
+ ),
+ ),
+ model=RunStep,
+ )
+
+
+class StepsWithRawResponse:
+ def __init__(self, steps: Steps) -> None:
+ self._steps = steps
+
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ steps.retrieve,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ steps.list,
+ )
+
+
+class AsyncStepsWithRawResponse:
+ def __init__(self, steps: AsyncSteps) -> None:
+ self._steps = steps
+
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ steps.retrieve,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ steps.list,
+ )
+
+
+class StepsWithStreamingResponse:
+ def __init__(self, steps: Steps) -> None:
+ self._steps = steps
+
+ self.retrieve = to_streamed_response_wrapper(
+ steps.retrieve,
+ )
+ self.list = to_streamed_response_wrapper(
+ steps.list,
+ )
+
+
+class AsyncStepsWithStreamingResponse:
+ def __init__(self, steps: AsyncSteps) -> None:
+ self._steps = steps
+
+ self.retrieve = async_to_streamed_response_wrapper(
+ steps.retrieve,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ steps.list,
+ )
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py
new file mode 100644
index 00000000..d88559bd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openai/resources/beta/threads/threads.py
@@ -0,0 +1,1875 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .... import _legacy_response
+from .messages import (
+ Messages,
+ AsyncMessages,
+ MessagesWithRawResponse,
+ AsyncMessagesWithRawResponse,
+ MessagesWithStreamingResponse,
+ AsyncMessagesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ required_args,
+ maybe_transform,
+ async_maybe_transform,
+)
+from .runs.runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
+from ....types.beta import (
+ thread_create_params,
+ thread_update_params,
+ thread_create_and_run_params,
+)
+from ...._base_client import make_request_options
+from ....lib.streaming import (
+ AssistantEventHandler,
+ AssistantEventHandlerT,
+ AssistantStreamManager,
+ AsyncAssistantEventHandler,
+ AsyncAssistantEventHandlerT,
+ AsyncAssistantStreamManager,
+)
+from ....types.beta.thread import Thread
+from ....types.beta.threads.run import Run
+from ....types.shared.chat_model import ChatModel
+from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.assistant_stream_event import AssistantStreamEvent
+from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Threads", "AsyncThreads"]
+
+
+class Threads(SyncAPIResource):
+ @cached_property
+ def runs(self) -> Runs:
+ return Runs(self._client)
+
+ @cached_property
+ def messages(self) -> Messages:
+ return Messages(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> ThreadsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return ThreadsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return ThreadsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Create a thread.
+
+ Args:
+ messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+ start the thread with.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ tool_resources: A set of resources that are made available to the assistant's tools in this
+ thread. The resources are specific to the type of tool. For example, the
+ `code_interpreter` tool requires a list of file IDs, while the `file_search`
+ tool requires a list of vector store IDs.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/threads",
+ body=maybe_transform(
+ {
+ "messages": messages,
+ "metadata": metadata,
+ "tool_resources": tool_resources,
+ },
+ thread_create_params.ThreadCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ def retrieve(
+ self,
+ thread_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Retrieves a thread.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._get(
+ f"/threads/{thread_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ def update(
+ self,
+ thread_id: str,
+ *,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Modifies a thread.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ tool_resources: A set of resources that are made available to the assistant's tools in this
+ thread. The resources are specific to the type of tool. For example, the
+ `code_interpreter` tool requires a list of file IDs, while the `file_search`
+ tool requires a list of vector store IDs.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ f"/threads/{thread_id}",
+ body=maybe_transform(
+ {
+ "metadata": metadata,
+ "tool_resources": tool_resources,
+ },
+ thread_update_params.ThreadUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ def delete(
+ self,
+ thread_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ThreadDeleted:
+ """
+ Delete a thread.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._delete(
+ f"/threads/{thread_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ThreadDeleted,
+ )
+
+ @overload
+ def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ stream: Literal[True],
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Stream[AssistantStreamEvent]:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ stream: bool,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["assistant_id"], ["assistant_id", "stream"])
+ def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | Stream[AssistantStreamEvent]:
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/threads/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "response_format": response_format,
+ "stream": stream,
+ "temperature": temperature,
+ "thread": thread,
+ "tool_choice": tool_choice,
+ "tool_resources": tool_resources,
+ "tools": tools,
+ "top_p": top_p,
+ "truncation_strategy": truncation_strategy,
+ },
+ thread_create_and_run_params.ThreadCreateAndRunParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+
+ def create_and_run_poll(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to create a thread, start a run and then poll for a terminal state.
+ More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = self.create_and_run(
+ assistant_id=assistant_id,
+ instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
+ metadata=metadata,
+ model=model,
+ parallel_tool_calls=parallel_tool_calls,
+ response_format=response_format,
+ temperature=temperature,
+ stream=False,
+ thread=thread,
+ tool_resources=tool_resources,
+ tool_choice=tool_choice,
+ truncation_strategy=truncation_strategy,
+ top_p=top_p,
+ tools=tools,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)
+
+ @overload
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ """Create a thread and stream the run back"""
+ ...
+
+ @overload
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ event_handler: AssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a thread and stream the run back"""
+ ...
+
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ event_handler: AssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+ """Create a thread and stream the run back"""
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ make_request = partial(
+ self._post,
+ "/threads/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "thread": thread,
+ "tools": tools,
+ "tool_resources": tool_resources,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ },
+ thread_create_and_run_params.ThreadCreateAndRunParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=Stream[AssistantStreamEvent],
+ )
+ return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncThreads(AsyncAPIResource):
+ @cached_property
+ def runs(self) -> AsyncRuns:
+ return AsyncRuns(self._client)
+
+ @cached_property
+ def messages(self) -> AsyncMessages:
+ return AsyncMessages(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncThreadsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncThreadsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Create a thread.
+
+ Args:
+ messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+ start the thread with.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ tool_resources: A set of resources that are made available to the assistant's tools in this
+ thread. The resources are specific to the type of tool. For example, the
+ `code_interpreter` tool requires a list of file IDs, while the `file_search`
+ tool requires a list of vector store IDs.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/threads",
+ body=await async_maybe_transform(
+ {
+ "messages": messages,
+ "metadata": metadata,
+ "tool_resources": tool_resources,
+ },
+ thread_create_params.ThreadCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ async def retrieve(
+ self,
+ thread_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Retrieves a thread.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._get(
+ f"/threads/{thread_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ async def update(
+ self,
+ thread_id: str,
+ *,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Thread:
+ """
+ Modifies a thread.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ tool_resources: A set of resources that are made available to the assistant's tools in this
+ thread. The resources are specific to the type of tool. For example, the
+ `code_interpreter` tool requires a list of file IDs, while the `file_search`
+ tool requires a list of vector store IDs.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ f"/threads/{thread_id}",
+ body=await async_maybe_transform(
+ {
+ "metadata": metadata,
+ "tool_resources": tool_resources,
+ },
+ thread_update_params.ThreadUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Thread,
+ )
+
+ async def delete(
+ self,
+ thread_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ThreadDeleted:
+ """
+ Delete a thread.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not thread_id:
+ raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._delete(
+ f"/threads/{thread_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ThreadDeleted,
+ )
+
+ @overload
+ async def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ stream: Literal[True],
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncStream[AssistantStreamEvent]:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ stream: bool,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ """
+ Create a thread and run it in one request.
+
+ Args:
+ assistant_id: The ID of the
+ [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+ execute this run.
+
+ stream: If `true`, returns a stream of events that happen during the Run as server-sent
+ events, terminating when the Run enters a terminal state with a `data: [DONE]`
+ message.
+
+ instructions: Override the default system message of the assistant. This is useful for
+ modifying the behavior on a per-run basis.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `incomplete`. See
+ `incomplete_details` for more info.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+ be used to execute this run. If a value is provided here, it will override the
+ model associated with the assistant. If not, the model associated with the
+ assistant will be used.
+
+ parallel_tool_calls: Whether to enable
+ [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+ during tool use.
+
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+ [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+ and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+ Outputs which ensures the model will match your supplied JSON schema. Learn more
+ in the
+ [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic.
+
+ thread: Options to create a new thread. If no thread is provided when running a request,
+ an empty thread will be created.
+
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling one or more
+ tools. `required` means the model must call one or more tools before responding
+ to the user. Specifying a particular tool like `{"type": "file_search"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
+ tool_resources: A set of resources that are used by the assistant's tools. The resources are
+ specific to the type of tool. For example, the `code_interpreter` tool requires
+ a list of file IDs, while the `file_search` tool requires a list of vector store
+ IDs.
+
+ tools: Override the tools the assistant can use for this run. This is useful for
+ modifying the behavior on a per-run basis.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or temperature but not both.
+
+ truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+ control the intial context window of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["assistant_id"], ["assistant_id", "stream"])
+ async def create_and_run(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run | AsyncStream[AssistantStreamEvent]:
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/threads/runs",
+ body=await async_maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "response_format": response_format,
+ "stream": stream,
+ "temperature": temperature,
+ "thread": thread,
+ "tool_choice": tool_choice,
+ "tool_resources": tool_resources,
+ "tools": tools,
+ "top_p": top_p,
+ "truncation_strategy": truncation_strategy,
+ },
+ thread_create_and_run_params.ThreadCreateAndRunParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=stream or False,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+
+ async def create_and_run_poll(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ poll_interval_ms: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Run:
+ """
+ A helper to create a thread, start a run and then poll for a terminal state.
+ More information on Run lifecycles can be found here:
+ https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+ """
+ run = await self.create_and_run(
+ assistant_id=assistant_id,
+ instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
+ metadata=metadata,
+ model=model,
+ parallel_tool_calls=parallel_tool_calls,
+ response_format=response_format,
+ temperature=temperature,
+ stream=False,
+ thread=thread,
+ tool_resources=tool_resources,
+ tool_choice=tool_choice,
+ truncation_strategy=truncation_strategy,
+ top_p=top_p,
+ tools=tools,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return await self.runs.poll(
+ run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
+ )
+
+ @overload
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ """Create a thread and stream the run back"""
+ ...
+
+ @overload
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ event_handler: AsyncAssistantEventHandlerT,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+ """Create a thread and stream the run back"""
+ ...
+
+ def create_and_run_stream(
+ self,
+ *,
+ assistant_id: str,
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+ tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+ tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+ event_handler: AsyncAssistantEventHandlerT | None = None,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> (
+ AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+ | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+ ):
+ """Create a thread and stream the run back"""
+ extra_headers = {
+ "OpenAI-Beta": "assistants=v2",
+ "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+ "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ "/threads/runs",
+ body=maybe_transform(
+ {
+ "assistant_id": assistant_id,
+ "instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
+ "metadata": metadata,
+ "model": model,
+ "parallel_tool_calls": parallel_tool_calls,
+ "response_format": response_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "stream": True,
+ "thread": thread,
+ "tools": tools,
+ "tool_resources": tool_resources,
+ "truncation_strategy": truncation_strategy,
+ "top_p": top_p,
+ },
+ thread_create_and_run_params.ThreadCreateAndRunParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Run,
+ stream=True,
+ stream_cls=AsyncStream[AssistantStreamEvent],
+ )
+ return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+
+class ThreadsWithRawResponse:
+ def __init__(self, threads: Threads) -> None:
+ self._threads = threads
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ threads.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ threads.retrieve,
+ )
+ self.update = _legacy_response.to_raw_response_wrapper(
+ threads.update,
+ )
+ self.delete = _legacy_response.to_raw_response_wrapper(
+ threads.delete,
+ )
+ self.create_and_run = _legacy_response.to_raw_response_wrapper(
+ threads.create_and_run,
+ )
+
+ @cached_property
+ def runs(self) -> RunsWithRawResponse:
+ return RunsWithRawResponse(self._threads.runs)
+
+ @cached_property
+ def messages(self) -> MessagesWithRawResponse:
+ return MessagesWithRawResponse(self._threads.messages)
+
+
+class AsyncThreadsWithRawResponse:
+ def __init__(self, threads: AsyncThreads) -> None:
+ self._threads = threads
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ threads.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ threads.retrieve,
+ )
+ self.update = _legacy_response.async_to_raw_response_wrapper(
+ threads.update,
+ )
+ self.delete = _legacy_response.async_to_raw_response_wrapper(
+ threads.delete,
+ )
+ self.create_and_run = _legacy_response.async_to_raw_response_wrapper(
+ threads.create_and_run,
+ )
+
+ @cached_property
+ def runs(self) -> AsyncRunsWithRawResponse:
+ return AsyncRunsWithRawResponse(self._threads.runs)
+
+ @cached_property
+ def messages(self) -> AsyncMessagesWithRawResponse:
+ return AsyncMessagesWithRawResponse(self._threads.messages)
+
+
+class ThreadsWithStreamingResponse:
+ def __init__(self, threads: Threads) -> None:
+ self._threads = threads
+
+ self.create = to_streamed_response_wrapper(
+ threads.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ threads.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ threads.update,
+ )
+ self.delete = to_streamed_response_wrapper(
+ threads.delete,
+ )
+ self.create_and_run = to_streamed_response_wrapper(
+ threads.create_and_run,
+ )
+
+ @cached_property
+ def runs(self) -> RunsWithStreamingResponse:
+ return RunsWithStreamingResponse(self._threads.runs)
+
+ @cached_property
+ def messages(self) -> MessagesWithStreamingResponse:
+ return MessagesWithStreamingResponse(self._threads.messages)
+
+
+class AsyncThreadsWithStreamingResponse:
+ def __init__(self, threads: AsyncThreads) -> None:
+ self._threads = threads
+
+ self.create = async_to_streamed_response_wrapper(
+ threads.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ threads.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ threads.update,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ threads.delete,
+ )
+ self.create_and_run = async_to_streamed_response_wrapper(
+ threads.create_and_run,
+ )
+
+ @cached_property
+ def runs(self) -> AsyncRunsWithStreamingResponse:
+ return AsyncRunsWithStreamingResponse(self._threads.runs)
+
+ @cached_property
+ def messages(self) -> AsyncMessagesWithStreamingResponse:
+ return AsyncMessagesWithStreamingResponse(self._threads.messages)