diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/openai/resources/embeddings.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/openai/resources/embeddings.py | 290 |
1 files changed, 290 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py b/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py new file mode 100644 index 00000000..a392d5eb --- /dev/null +++ b/.venv/lib/python3.12/site-packages/openai/resources/embeddings.py @@ -0,0 +1,290 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import array +import base64 +from typing import List, Union, Iterable, cast +from typing_extensions import Literal + +import httpx + +from .. import _legacy_response +from ..types import embedding_create_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import is_given, maybe_transform +from .._compat import cached_property +from .._extras import numpy as np, has_numpy +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from .._base_client import make_request_options +from ..types.embedding_model import EmbeddingModel +from ..types.create_embedding_response import CreateEmbeddingResponse + +__all__ = ["Embeddings", "AsyncEmbeddings"] + + +class Embeddings(SyncAPIResource): + @cached_property + def with_raw_response(self) -> EmbeddingsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return EmbeddingsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> EmbeddingsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return EmbeddingsWithStreamingResponse(self) + + def create( + self, + *, + input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + model: Union[str, EmbeddingModel], + dimensions: int | NotGiven = NOT_GIVEN, + encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CreateEmbeddingResponse: + """ + Creates an embedding vector representing the input text. + + Args: + input: Input text to embed, encoded as a string or array of tokens. To embed multiple + inputs in a single request, pass an array of strings or array of token arrays. + The input must not exceed the max input tokens for the model (8192 tokens for + `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 + dimensions or less. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. Some models may also impose a limit on total number of + tokens summed across inputs. + + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + dimensions: The number of dimensions the resulting output embeddings should have. Only + supported in `text-embedding-3` and later models. + + encoding_format: The format to return the embeddings in. Can be either `float` or + [`base64`](https://pypi.org/project/pybase64/). + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + params = { + "input": input, + "model": model, + "user": user, + "dimensions": dimensions, + "encoding_format": encoding_format, + } + if not is_given(encoding_format): + params["encoding_format"] = "base64" + + def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: + if is_given(encoding_format): + # don't modify the response object if a user explicitly asked for a format + return obj + + for embedding in obj.data: + data = cast(object, embedding.embedding) + if not isinstance(data, str): + continue + if not has_numpy(): + # use array for base64 optimisation + embedding.embedding = array.array("f", base64.b64decode(data)).tolist() + else: + embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] + base64.b64decode(data), dtype="float32" + ).tolist() + + return obj + + return self._post( + "/embeddings", + body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + cast_to=CreateEmbeddingResponse, + ) + + +class AsyncEmbeddings(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncEmbeddingsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncEmbeddingsWithStreamingResponse(self) + + async def create( + self, + *, + input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + model: Union[str, EmbeddingModel], + dimensions: int | NotGiven = NOT_GIVEN, + encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CreateEmbeddingResponse: + """ + Creates an embedding vector representing the input text. + + Args: + input: Input text to embed, encoded as a string or array of tokens. To embed multiple + inputs in a single request, pass an array of strings or array of token arrays. + The input must not exceed the max input tokens for the model (8192 tokens for + `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 + dimensions or less. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) + for counting tokens. Some models may also impose a limit on total number of + tokens summed across inputs. + + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models) for descriptions of + them. + + dimensions: The number of dimensions the resulting output embeddings should have. Only + supported in `text-embedding-3` and later models. + + encoding_format: The format to return the embeddings in. Can be either `float` or + [`base64`](https://pypi.org/project/pybase64/). + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + params = { + "input": input, + "model": model, + "user": user, + "dimensions": dimensions, + "encoding_format": encoding_format, + } + if not is_given(encoding_format): + params["encoding_format"] = "base64" + + def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: + if is_given(encoding_format): + # don't modify the response object if a user explicitly asked for a format + return obj + + for embedding in obj.data: + data = cast(object, embedding.embedding) + if not isinstance(data, str): + continue + if not has_numpy(): + # use array for base64 optimisation + embedding.embedding = array.array("f", base64.b64decode(data)).tolist() + else: + embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] + base64.b64decode(data), dtype="float32" + ).tolist() + + return obj + + return await self._post( + "/embeddings", + body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=parser, + ), + cast_to=CreateEmbeddingResponse, + ) + + +class EmbeddingsWithRawResponse: + def __init__(self, embeddings: Embeddings) -> None: + self._embeddings = embeddings + + self.create = _legacy_response.to_raw_response_wrapper( + embeddings.create, + ) + + +class AsyncEmbeddingsWithRawResponse: + def __init__(self, embeddings: AsyncEmbeddings) -> None: + self._embeddings = embeddings + + self.create = _legacy_response.async_to_raw_response_wrapper( + embeddings.create, + ) + + +class EmbeddingsWithStreamingResponse: + def __init__(self, embeddings: Embeddings) -> None: + self._embeddings = embeddings + + self.create = to_streamed_response_wrapper( + embeddings.create, + ) + + +class AsyncEmbeddingsWithStreamingResponse: + def __init__(self, embeddings: AsyncEmbeddings) -> None: + self._embeddings = embeddings + + self.create = async_to_streamed_response_wrapper( + embeddings.create, + ) |