diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils')
4 files changed, 319 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/__init__.py new file mode 100644 index 00000000..f65f5505 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/__init__.py @@ -0,0 +1,10 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +"""AzureML Retrieval Augmented Generation (RAG) utilities.""" + +from ._models import build_model_protocol +from ._open_ai_utils import build_open_ai_protocol, build_connection_id +from ._pipeline_decorator import pipeline + +__all__ = ["build_model_protocol", "build_open_ai_protocol", "build_connection_id", "pipeline"] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_models.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_models.py new file mode 100644 index 00000000..d3e8c952 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_models.py @@ -0,0 +1,25 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +"""DataIndex embedding model helpers.""" +import re +from typing import Optional + +OPEN_AI_PROTOCOL_TEMPLATE = "azure_open_ai://deployment/{}/model/{}" +OPEN_AI_PROTOCOL_REGEX_PATTERN = OPEN_AI_PROTOCOL_TEMPLATE.format(".*", ".*") +OPEN_AI_SHORT_FORM_PROTOCOL_TEMPLATE = "azure_open_ai://deployments?/{}" +OPEN_AI_PROTOCOL_REGEX_PATTERN = OPEN_AI_SHORT_FORM_PROTOCOL_TEMPLATE.format(".*") + +HUGGINGFACE_PROTOCOL_TEMPLATE = "hugging_face://model/{}" +HUGGINGFACE_PROTOCOL_REGEX_PATTERN = HUGGINGFACE_PROTOCOL_TEMPLATE.format(".*") + + +def build_model_protocol(model: Optional[str] = None): + if not model or re.match(OPEN_AI_PROTOCOL_REGEX_PATTERN, model, re.IGNORECASE): + return model + if re.match(OPEN_AI_SHORT_FORM_PROTOCOL_TEMPLATE, model, re.IGNORECASE): + return model + if re.match(HUGGINGFACE_PROTOCOL_REGEX_PATTERN, model, re.IGNORECASE): + return model + + return OPEN_AI_PROTOCOL_TEMPLATE.format(model, model) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_open_ai_utils.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_open_ai_utils.py new file mode 100644 index 00000000..d38a447f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_open_ai_utils.py @@ -0,0 +1,36 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml._utils._arm_id_utils import is_ARM_id_for_resource +from azure.ai.ml._scope_dependent_operations import OperationScope + +OPEN_AI_PROTOCOL_TEMPLATE = "azure_open_ai://deployment/{}/model/{}" + + +def build_open_ai_protocol( + model: Optional[str] = None, + deployment: Optional[str] = None, +): + if not deployment or not model: + return None + return OPEN_AI_PROTOCOL_TEMPLATE.format(deployment, model) + + +def build_connection_id(id: Optional[str], scope: OperationScope): + if not id or not scope.subscription_id or not scope.resource_group_name or not scope.workspace_name: + return id + + if is_ARM_id_for_resource(id, "connections", True): + return id + + # pylint: disable=line-too-long + template = "/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.MachineLearningServices/workspaces/{workspace_name}/connections/{id}" + return template.format( + subscription_id=scope.subscription_id, + resource_group_name=scope.resource_group_name, + workspace_name=scope.workspace_name, + id=id, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_pipeline_decorator.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_pipeline_decorator.py new file mode 100644 index 00000000..e70f97f2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/utils/_pipeline_decorator.py @@ -0,0 +1,248 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import inspect +import logging +from functools import wraps +from pathlib import Path +from typing import Any, Callable, Dict, Optional, TypeVar, Union, overload + +from typing_extensions import ParamSpec + +from azure.ai.ml.entities import Data, Model, PipelineJob, PipelineJobSettings +from azure.ai.ml.entities._builders.pipeline import Pipeline +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.pipeline._io import NodeOutput, PipelineInput, _GroupAttrDict +from azure.ai.ml.entities._job.pipeline._pipeline_expression import PipelineExpression +from azure.ai.ml.exceptions import UserErrorException + +from azure.ai.ml.dsl._pipeline_component_builder import PipelineComponentBuilder, _is_inside_dsl_pipeline_func +from azure.ai.ml.dsl._pipeline_decorator import _validate_args +from azure.ai.ml.dsl._settings import _dsl_settings_stack +from azure.ai.ml.dsl._utils import _resolve_source_file + +SUPPORTED_INPUT_TYPES = ( + PipelineInput, + NodeOutput, + Input, + Model, + Data, # For the case use a Data object as an input, we will convert it to Input object + Pipeline, # For the case use a pipeline node as the input, we use its only one output as the real input. + str, + bool, + int, + float, + PipelineExpression, + _GroupAttrDict, +) +module_logger = logging.getLogger(__name__) + +T = TypeVar("T") +P = ParamSpec("P") + + +# Overload the returns a decorator when func is None +@overload +def pipeline( + func: None, + *, + name: Optional[str] = None, + version: Optional[str] = None, + display_name: Optional[str] = None, + description: Optional[str] = None, + experiment_name: Optional[str] = None, + tags: Optional[Dict[str, str]] = None, + **kwargs: Any, +) -> Callable[[Callable[P, T]], Callable[P, PipelineJob]]: ... + + +# Overload the returns a decorated function when func isn't None +@overload +def pipeline( + func: Callable[P, T], + *, + name: Optional[str] = None, + version: Optional[str] = None, + display_name: Optional[str] = None, + description: Optional[str] = None, + experiment_name: Optional[str] = None, + tags: Optional[Dict[str, str]] = None, + **kwargs: Any, +) -> Callable[P, PipelineJob]: ... + + +def pipeline( + func: Optional[Callable[P, T]] = None, + *, + name: Optional[str] = None, + version: Optional[str] = None, + display_name: Optional[str] = None, + description: Optional[str] = None, + experiment_name: Optional[str] = None, + tags: Optional[Union[Dict[str, str], str]] = None, + **kwargs: Any, +) -> Union[Callable[[Callable[P, T]], Callable[P, PipelineJob]], Callable[P, PipelineJob]]: + """Build a pipeline which contains all component nodes defined in this function. + + :param func: The user pipeline function to be decorated. + :type func: types.FunctionType + :keyword name: The name of pipeline component, defaults to function name. + :paramtype name: str + :keyword version: The version of pipeline component, defaults to "1". + :paramtype version: str + :keyword display_name: The display name of pipeline component, defaults to function name. + :paramtype display_name: str + :keyword description: The description of the built pipeline. + :paramtype description: str + :keyword experiment_name: Name of the experiment the job will be created under, \ + if None is provided, experiment will be set to current directory. + :paramtype experiment_name: str + :keyword tags: The tags of pipeline component. + :paramtype tags: dict[str, str] + :return: Either + * A decorator, if `func` is None + * The decorated `func` + + :rtype: Union[ + Callable[[Callable], Callable[..., ~azure.ai.ml.entities.PipelineJob]], + Callable[P, ~azure.ai.ml.entities.PipelineJob] + + ] + + .. admonition:: Example: + + .. literalinclude:: ../../../../samples/ml_samples_pipeline_job_configurations.py + :start-after: [START configure_pipeline] + :end-before: [END configure_pipeline] + :language: python + :dedent: 8 + :caption: Shows how to create a pipeline using this decorator. + """ + + # get_component force pipeline to return Pipeline instead of PipelineJob so we can set optional argument + # need to remove get_component and rely on azure.ai.ml.dsl.pipeline + get_component = kwargs.get("get_component", False) + + def pipeline_decorator(func: Callable[P, T]) -> Callable: + if not isinstance(func, Callable): # type: ignore + raise UserErrorException(f"Dsl pipeline decorator accept only function type, got {type(func)}.") + + non_pipeline_inputs = kwargs.get("non_pipeline_inputs", []) or kwargs.get("non_pipeline_parameters", []) + # compute variable names changed from default_compute_targe -> compute -> default_compute -> none + # to support legacy usage, we support them with priority. + compute = kwargs.get("compute", None) + default_compute_target = kwargs.get("default_compute_target", None) + default_compute_target = kwargs.get("default_compute", None) or default_compute_target + continue_on_step_failure = kwargs.get("continue_on_step_failure", None) + on_init = kwargs.get("on_init", None) + on_finalize = kwargs.get("on_finalize", None) + + default_datastore = kwargs.get("default_datastore", None) + force_rerun = kwargs.get("force_rerun", None) + job_settings = { + "default_datastore": default_datastore, + "continue_on_step_failure": continue_on_step_failure, + "force_rerun": force_rerun, + "default_compute": default_compute_target, + "on_init": on_init, + "on_finalize": on_finalize, + } + func_entry_path = _resolve_source_file() + if not func_entry_path: + func_path = Path(inspect.getfile(func)) + # in notebook, func_path may be a fake path and will raise error when trying to resolve this fake path + if func_path.exists(): + func_entry_path = func_path.resolve().absolute() + + job_settings = {k: v for k, v in job_settings.items() if v is not None} + pipeline_builder = PipelineComponentBuilder( + func=func, + name=name, + version=version, + display_name=display_name, + description=description, + default_datastore=default_datastore, + tags=tags, + source_path=str(func_entry_path), + non_pipeline_inputs=non_pipeline_inputs, + ) + + @wraps(func) + def wrapper(*args: P.args, **kwargs: P.kwargs) -> Union[Pipeline, PipelineJob]: + # Default args will be added here. + # Node: push/pop stack here instead of put it inside build() + # Because we only want to enable dsl settings on top level pipeline + _dsl_settings_stack.push() # use this stack to track on_init/on_finalize settings + try: + # Convert args to kwargs + provided_positional_kwargs = _validate_args(func, args, kwargs, non_pipeline_inputs) + + # When pipeline supports variable params, update pipeline component to support the inputs in **kwargs. + pipeline_parameters = { + k: v for k, v in provided_positional_kwargs.items() if k not in non_pipeline_inputs + } + pipeline_builder._update_inputs(pipeline_parameters) + + non_pipeline_params_dict = { + k: v for k, v in provided_positional_kwargs.items() if k in non_pipeline_inputs + } + + # TODO: cache built pipeline component + pipeline_component = pipeline_builder.build( + user_provided_kwargs=provided_positional_kwargs, + non_pipeline_inputs_dict=non_pipeline_params_dict, + non_pipeline_inputs=non_pipeline_inputs, + ) + finally: + # use `finally` to ensure pop operation from the stack + dsl_settings = _dsl_settings_stack.pop() + + # update on_init/on_finalize settings if init/finalize job is set + if dsl_settings.init_job_set: + job_settings["on_init"] = dsl_settings.init_job_name(pipeline_component.jobs) + if dsl_settings.finalize_job_set: + job_settings["on_finalize"] = dsl_settings.finalize_job_name(pipeline_component.jobs) + + # TODO: pass compute & default_compute separately? + common_init_args: Any = { + "experiment_name": experiment_name, + "component": pipeline_component, + "inputs": pipeline_parameters, + "tags": tags, + } + built_pipeline: Any = None + if _is_inside_dsl_pipeline_func() or get_component: + # on_init/on_finalize is not supported for pipeline component + if job_settings.get("on_init") is not None or job_settings.get("on_finalize") is not None: + raise UserErrorException("On_init/on_finalize is not supported for pipeline component.") + # Build pipeline node instead of pipeline job if inside dsl. + built_pipeline = Pipeline(_from_component_func=True, **common_init_args) + if job_settings: + module_logger.warning( + ("Job settings %s on pipeline function %r are ignored when using inside PipelineJob."), + job_settings, + func.__name__, + ) + else: + built_pipeline = PipelineJob( + jobs=pipeline_component.jobs, + compute=compute, + settings=PipelineJobSettings(**job_settings), + **common_init_args, + ) + + return built_pipeline + + # Bug Item number: 2883169 + wrapper._is_dsl_func = True # type: ignore + wrapper._job_settings = job_settings # type: ignore + wrapper._pipeline_builder = pipeline_builder # type: ignore + return wrapper + + # enable use decorator without "()" if all arguments are default values + if func is not None: + return pipeline_decorator(func) + return pipeline_decorator |
