diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/responses | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/responses')
3 files changed, 615 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/responses/main.py b/.venv/lib/python3.12/site-packages/litellm/responses/main.py new file mode 100644 index 00000000..aec2f8fe --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/responses/main.py @@ -0,0 +1,248 @@ +import asyncio +import contextvars +from functools import partial +from typing import Any, Dict, Iterable, List, Literal, Optional, Union + +import httpx + +import litellm +from litellm.constants import request_timeout +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig +from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler +from litellm.responses.utils import ResponsesAPIRequestUtils +from litellm.types.llms.openai import ( + Reasoning, + ResponseIncludable, + ResponseInputParam, + ResponsesAPIOptionalRequestParams, + ResponsesAPIResponse, + ResponseTextConfigParam, + ToolChoice, + ToolParam, +) +from litellm.types.router import GenericLiteLLMParams +from litellm.utils import ProviderConfigManager, client + +from .streaming_iterator import BaseResponsesAPIStreamingIterator + +####### ENVIRONMENT VARIABLES ################### +# Initialize any necessary instances or variables here +base_llm_http_handler = BaseLLMHTTPHandler() +################################################# + + +@client +async def aresponses( + input: Union[str, ResponseInputParam], + model: str, + include: Optional[List[ResponseIncludable]] = None, + instructions: Optional[str] = None, + max_output_tokens: Optional[int] = None, + metadata: Optional[Dict[str, Any]] = None, + parallel_tool_calls: Optional[bool] = None, + previous_response_id: Optional[str] = None, + reasoning: Optional[Reasoning] = None, + store: Optional[bool] = None, + stream: Optional[bool] = None, + temperature: Optional[float] = None, + text: Optional[ResponseTextConfigParam] = None, + tool_choice: Optional[ToolChoice] = None, + tools: Optional[Iterable[ToolParam]] = None, + top_p: Optional[float] = None, + truncation: Optional[Literal["auto", "disabled"]] = None, + user: Optional[str] = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Optional[Dict[str, Any]] = None, + extra_query: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + # LiteLLM specific params, + custom_llm_provider: Optional[str] = None, + **kwargs, +) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]: + """ + Async: Handles responses API requests by reusing the synchronous function + """ + local_vars = locals() + try: + loop = asyncio.get_event_loop() + kwargs["aresponses"] = True + + # get custom llm provider so we can use this for mapping exceptions + if custom_llm_provider is None: + _, custom_llm_provider, _, _ = litellm.get_llm_provider( + model=model, api_base=local_vars.get("base_url", None) + ) + + func = partial( + responses, + input=input, + model=model, + include=include, + instructions=instructions, + max_output_tokens=max_output_tokens, + metadata=metadata, + parallel_tool_calls=parallel_tool_calls, + previous_response_id=previous_response_id, + reasoning=reasoning, + store=store, + stream=stream, + temperature=temperature, + text=text, + tool_choice=tool_choice, + tools=tools, + top_p=top_p, + truncation=truncation, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + custom_llm_provider=custom_llm_provider, + **kwargs, + ) + + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response + return response + except Exception as e: + raise litellm.exception_type( + model=model, + custom_llm_provider=custom_llm_provider, + original_exception=e, + completion_kwargs=local_vars, + extra_kwargs=kwargs, + ) + + +@client +def responses( + input: Union[str, ResponseInputParam], + model: str, + include: Optional[List[ResponseIncludable]] = None, + instructions: Optional[str] = None, + max_output_tokens: Optional[int] = None, + metadata: Optional[Dict[str, Any]] = None, + parallel_tool_calls: Optional[bool] = None, + previous_response_id: Optional[str] = None, + reasoning: Optional[Reasoning] = None, + store: Optional[bool] = None, + stream: Optional[bool] = None, + temperature: Optional[float] = None, + text: Optional[ResponseTextConfigParam] = None, + tool_choice: Optional[ToolChoice] = None, + tools: Optional[Iterable[ToolParam]] = None, + top_p: Optional[float] = None, + truncation: Optional[Literal["auto", "disabled"]] = None, + user: Optional[str] = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Optional[Dict[str, Any]] = None, + extra_query: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + # LiteLLM specific params, + custom_llm_provider: Optional[str] = None, + **kwargs, +): + """ + Synchronous version of the Responses API. + Uses the synchronous HTTP handler to make requests. + """ + local_vars = locals() + try: + litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore + litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None) + _is_async = kwargs.pop("aresponses", False) is True + + # get llm provider logic + litellm_params = GenericLiteLLMParams(**kwargs) + model, custom_llm_provider, dynamic_api_key, dynamic_api_base = ( + litellm.get_llm_provider( + model=model, + custom_llm_provider=custom_llm_provider, + api_base=litellm_params.api_base, + api_key=litellm_params.api_key, + ) + ) + + # get provider config + responses_api_provider_config: Optional[BaseResponsesAPIConfig] = ( + ProviderConfigManager.get_provider_responses_api_config( + model=model, + provider=litellm.LlmProviders(custom_llm_provider), + ) + ) + + if responses_api_provider_config is None: + raise litellm.BadRequestError( + model=model, + llm_provider=custom_llm_provider, + message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}", + ) + + local_vars.update(kwargs) + # Get ResponsesAPIOptionalRequestParams with only valid parameters + response_api_optional_params: ResponsesAPIOptionalRequestParams = ( + ResponsesAPIRequestUtils.get_requested_response_api_optional_param( + local_vars + ) + ) + + # Get optional parameters for the responses API + responses_api_request_params: Dict = ( + ResponsesAPIRequestUtils.get_optional_params_responses_api( + model=model, + responses_api_provider_config=responses_api_provider_config, + response_api_optional_params=response_api_optional_params, + ) + ) + + # Pre Call logging + litellm_logging_obj.update_environment_variables( + model=model, + user=user, + optional_params=dict(responses_api_request_params), + litellm_params={ + "litellm_call_id": litellm_call_id, + **responses_api_request_params, + }, + custom_llm_provider=custom_llm_provider, + ) + + # Call the handler with _is_async flag instead of directly calling the async handler + response = base_llm_http_handler.response_api_handler( + model=model, + input=input, + responses_api_provider_config=responses_api_provider_config, + response_api_optional_request_params=responses_api_request_params, + custom_llm_provider=custom_llm_provider, + litellm_params=litellm_params, + logging_obj=litellm_logging_obj, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout or request_timeout, + _is_async=_is_async, + client=kwargs.get("client"), + fake_stream=responses_api_provider_config.should_fake_stream( + model=model, stream=stream, custom_llm_provider=custom_llm_provider + ), + ) + + return response + except Exception as e: + raise litellm.exception_type( + model=model, + custom_llm_provider=custom_llm_provider, + original_exception=e, + completion_kwargs=local_vars, + extra_kwargs=kwargs, + ) diff --git a/.venv/lib/python3.12/site-packages/litellm/responses/streaming_iterator.py b/.venv/lib/python3.12/site-packages/litellm/responses/streaming_iterator.py new file mode 100644 index 00000000..3039efb9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/responses/streaming_iterator.py @@ -0,0 +1,270 @@ +import asyncio +import json +from datetime import datetime +from typing import Optional + +import httpx + +from litellm.constants import STREAM_SSE_DONE_STRING +from litellm.litellm_core_utils.asyncify import run_async_function +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.litellm_core_utils.thread_pool_executor import executor +from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig +from litellm.types.llms.openai import ( + ResponseCompletedEvent, + ResponsesAPIStreamEvents, + ResponsesAPIStreamingResponse, +) +from litellm.utils import CustomStreamWrapper + + +class BaseResponsesAPIStreamingIterator: + """ + Base class for streaming iterators that process responses from the Responses API. + + This class contains shared logic for both synchronous and asynchronous iterators. + """ + + def __init__( + self, + response: httpx.Response, + model: str, + responses_api_provider_config: BaseResponsesAPIConfig, + logging_obj: LiteLLMLoggingObj, + ): + self.response = response + self.model = model + self.logging_obj = logging_obj + self.finished = False + self.responses_api_provider_config = responses_api_provider_config + self.completed_response: Optional[ResponsesAPIStreamingResponse] = None + self.start_time = datetime.now() + + def _process_chunk(self, chunk): + """Process a single chunk of data from the stream""" + if not chunk: + return None + + # Handle SSE format (data: {...}) + chunk = CustomStreamWrapper._strip_sse_data_from_chunk(chunk) + if chunk is None: + return None + + # Handle "[DONE]" marker + if chunk == STREAM_SSE_DONE_STRING: + self.finished = True + return None + + try: + # Parse the JSON chunk + parsed_chunk = json.loads(chunk) + + # Format as ResponsesAPIStreamingResponse + if isinstance(parsed_chunk, dict): + openai_responses_api_chunk = ( + self.responses_api_provider_config.transform_streaming_response( + model=self.model, + parsed_chunk=parsed_chunk, + logging_obj=self.logging_obj, + ) + ) + # Store the completed response + if ( + openai_responses_api_chunk + and openai_responses_api_chunk.type + == ResponsesAPIStreamEvents.RESPONSE_COMPLETED + ): + self.completed_response = openai_responses_api_chunk + self._handle_logging_completed_response() + + return openai_responses_api_chunk + + return None + except json.JSONDecodeError: + # If we can't parse the chunk, continue + return None + + def _handle_logging_completed_response(self): + """Base implementation - should be overridden by subclasses""" + pass + + +class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator): + """ + Async iterator for processing streaming responses from the Responses API. + """ + + def __init__( + self, + response: httpx.Response, + model: str, + responses_api_provider_config: BaseResponsesAPIConfig, + logging_obj: LiteLLMLoggingObj, + ): + super().__init__(response, model, responses_api_provider_config, logging_obj) + self.stream_iterator = response.aiter_lines() + + def __aiter__(self): + return self + + async def __anext__(self) -> ResponsesAPIStreamingResponse: + try: + while True: + # Get the next chunk from the stream + try: + chunk = await self.stream_iterator.__anext__() + except StopAsyncIteration: + self.finished = True + raise StopAsyncIteration + + result = self._process_chunk(chunk) + + if self.finished: + raise StopAsyncIteration + elif result is not None: + return result + # If result is None, continue the loop to get the next chunk + + except httpx.HTTPError as e: + # Handle HTTP errors + self.finished = True + raise e + + def _handle_logging_completed_response(self): + """Handle logging for completed responses in async context""" + asyncio.create_task( + self.logging_obj.async_success_handler( + result=self.completed_response, + start_time=self.start_time, + end_time=datetime.now(), + cache_hit=None, + ) + ) + + executor.submit( + self.logging_obj.success_handler, + result=self.completed_response, + cache_hit=None, + start_time=self.start_time, + end_time=datetime.now(), + ) + + +class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator): + """ + Synchronous iterator for processing streaming responses from the Responses API. + """ + + def __init__( + self, + response: httpx.Response, + model: str, + responses_api_provider_config: BaseResponsesAPIConfig, + logging_obj: LiteLLMLoggingObj, + ): + super().__init__(response, model, responses_api_provider_config, logging_obj) + self.stream_iterator = response.iter_lines() + + def __iter__(self): + return self + + def __next__(self): + try: + while True: + # Get the next chunk from the stream + try: + chunk = next(self.stream_iterator) + except StopIteration: + self.finished = True + raise StopIteration + + result = self._process_chunk(chunk) + + if self.finished: + raise StopIteration + elif result is not None: + return result + # If result is None, continue the loop to get the next chunk + + except httpx.HTTPError as e: + # Handle HTTP errors + self.finished = True + raise e + + def _handle_logging_completed_response(self): + """Handle logging for completed responses in sync context""" + run_async_function( + async_function=self.logging_obj.async_success_handler, + result=self.completed_response, + start_time=self.start_time, + end_time=datetime.now(), + cache_hit=None, + ) + + executor.submit( + self.logging_obj.success_handler, + result=self.completed_response, + cache_hit=None, + start_time=self.start_time, + end_time=datetime.now(), + ) + + +class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator): + """ + mock iterator - some models like o1-pro do not support streaming, we need to fake a stream + """ + + def __init__( + self, + response: httpx.Response, + model: str, + responses_api_provider_config: BaseResponsesAPIConfig, + logging_obj: LiteLLMLoggingObj, + ): + self.raw_http_response = response + super().__init__( + response=response, + model=model, + responses_api_provider_config=responses_api_provider_config, + logging_obj=logging_obj, + ) + self.is_done = False + + def __aiter__(self): + return self + + async def __anext__(self) -> ResponsesAPIStreamingResponse: + if self.is_done: + raise StopAsyncIteration + self.is_done = True + transformed_response = ( + self.responses_api_provider_config.transform_response_api_response( + model=self.model, + raw_response=self.raw_http_response, + logging_obj=self.logging_obj, + ) + ) + return ResponseCompletedEvent( + type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED, + response=transformed_response, + ) + + def __iter__(self): + return self + + def __next__(self) -> ResponsesAPIStreamingResponse: + if self.is_done: + raise StopIteration + self.is_done = True + transformed_response = ( + self.responses_api_provider_config.transform_response_api_response( + model=self.model, + raw_response=self.raw_http_response, + logging_obj=self.logging_obj, + ) + ) + return ResponseCompletedEvent( + type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED, + response=transformed_response, + ) diff --git a/.venv/lib/python3.12/site-packages/litellm/responses/utils.py b/.venv/lib/python3.12/site-packages/litellm/responses/utils.py new file mode 100644 index 00000000..49d850ec --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/responses/utils.py @@ -0,0 +1,97 @@ +from typing import Any, Dict, cast, get_type_hints + +import litellm +from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig +from litellm.types.llms.openai import ( + ResponseAPIUsage, + ResponsesAPIOptionalRequestParams, +) +from litellm.types.utils import Usage + + +class ResponsesAPIRequestUtils: + """Helper utils for constructing ResponseAPI requests""" + + @staticmethod + def get_optional_params_responses_api( + model: str, + responses_api_provider_config: BaseResponsesAPIConfig, + response_api_optional_params: ResponsesAPIOptionalRequestParams, + ) -> Dict: + """ + Get optional parameters for the responses API. + + Args: + params: Dictionary of all parameters + model: The model name + responses_api_provider_config: The provider configuration for responses API + + Returns: + A dictionary of supported parameters for the responses API + """ + # Remove None values and internal parameters + + # Get supported parameters for the model + supported_params = responses_api_provider_config.get_supported_openai_params( + model + ) + + # Check for unsupported parameters + unsupported_params = [ + param + for param in response_api_optional_params + if param not in supported_params + ] + + if unsupported_params: + raise litellm.UnsupportedParamsError( + model=model, + message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}", + ) + + # Map parameters to provider-specific format + mapped_params = responses_api_provider_config.map_openai_params( + response_api_optional_params=response_api_optional_params, + model=model, + drop_params=litellm.drop_params, + ) + + return mapped_params + + @staticmethod + def get_requested_response_api_optional_param( + params: Dict[str, Any] + ) -> ResponsesAPIOptionalRequestParams: + """ + Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams. + + Args: + params: Dictionary of parameters to filter + + Returns: + ResponsesAPIOptionalRequestParams instance with only the valid parameters + """ + valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys() + filtered_params = {k: v for k, v in params.items() if k in valid_keys} + return cast(ResponsesAPIOptionalRequestParams, filtered_params) + + +class ResponseAPILoggingUtils: + @staticmethod + def _is_response_api_usage(usage: dict) -> bool: + """returns True if usage is from OpenAI Response API""" + if "input_tokens" in usage and "output_tokens" in usage: + return True + return False + + @staticmethod + def _transform_response_api_usage_to_chat_usage(usage: dict) -> Usage: + """Tranforms the ResponseAPIUsage object to a Usage object""" + response_api_usage: ResponseAPIUsage = ResponseAPIUsage(**usage) + prompt_tokens: int = response_api_usage.input_tokens or 0 + completion_tokens: int = response_api_usage.output_tokens or 0 + return Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ) |