diff options
| author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
|---|---|---|
| committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
| commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
| tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job | |
| parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
| download | gn-ai-master.tar.gz | |
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job')
108 files changed, 23457 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/_input_output_helpers.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/_input_output_helpers.py new file mode 100644 index 00000000..1a13ab41 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/_input_output_helpers.py @@ -0,0 +1,427 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import collections.abc +import re +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + CustomModelJobInput as RestCustomModelJobInput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + CustomModelJobOutput as RestCustomModelJobOutput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import InputDeliveryMode +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobInput as RestJobInput +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobInputType +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobOutput as RestJobOutput +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobOutputType, LiteralJobInput +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + MLFlowModelJobInput as RestMLFlowModelJobInput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + MLFlowModelJobOutput as RestMLFlowModelJobOutput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + MLTableJobInput as RestMLTableJobInput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + MLTableJobOutput as RestMLTableJobOutput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import OutputDeliveryMode +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + TritonModelJobInput as RestTritonModelJobInput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + TritonModelJobOutput as RestTritonModelJobOutput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + UriFileJobInput as RestUriFileJobInput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + UriFileJobOutput as RestUriFileJobOutput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + UriFolderJobInput as RestUriFolderJobInput, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + UriFolderJobOutput as RestUriFolderJobOutput, +) +from azure.ai.ml._utils.utils import is_data_binding_expression +from azure.ai.ml.constants import AssetTypes, InputOutputModes, JobType +from azure.ai.ml.constants._component import IOConstants +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._job.input_output_entry import InputOutputEntry +from azure.ai.ml.entities._util import normalize_job_input_output_type +from azure.ai.ml.exceptions import ( + ErrorCategory, + ErrorTarget, + JobException, + ValidationErrorType, + ValidationException, +) + +INPUT_MOUNT_MAPPING_FROM_REST = { + InputDeliveryMode.READ_WRITE_MOUNT: InputOutputModes.RW_MOUNT, + InputDeliveryMode.READ_ONLY_MOUNT: InputOutputModes.RO_MOUNT, + InputDeliveryMode.DOWNLOAD: InputOutputModes.DOWNLOAD, + InputDeliveryMode.DIRECT: InputOutputModes.DIRECT, + InputDeliveryMode.EVAL_MOUNT: InputOutputModes.EVAL_MOUNT, + InputDeliveryMode.EVAL_DOWNLOAD: InputOutputModes.EVAL_DOWNLOAD, +} + +INPUT_MOUNT_MAPPING_TO_REST = { + InputOutputModes.MOUNT: InputDeliveryMode.READ_ONLY_MOUNT, + InputOutputModes.RW_MOUNT: InputDeliveryMode.READ_WRITE_MOUNT, + InputOutputModes.RO_MOUNT: InputDeliveryMode.READ_ONLY_MOUNT, + InputOutputModes.DOWNLOAD: InputDeliveryMode.DOWNLOAD, + InputOutputModes.EVAL_MOUNT: InputDeliveryMode.EVAL_MOUNT, + InputOutputModes.EVAL_DOWNLOAD: InputDeliveryMode.EVAL_DOWNLOAD, + InputOutputModes.DIRECT: InputDeliveryMode.DIRECT, +} + + +OUTPUT_MOUNT_MAPPING_FROM_REST = { + OutputDeliveryMode.READ_WRITE_MOUNT: InputOutputModes.RW_MOUNT, + OutputDeliveryMode.UPLOAD: InputOutputModes.UPLOAD, + OutputDeliveryMode.DIRECT: InputOutputModes.DIRECT, +} + +OUTPUT_MOUNT_MAPPING_TO_REST = { + InputOutputModes.MOUNT: OutputDeliveryMode.READ_WRITE_MOUNT, + InputOutputModes.UPLOAD: OutputDeliveryMode.UPLOAD, + InputOutputModes.RW_MOUNT: OutputDeliveryMode.READ_WRITE_MOUNT, + InputOutputModes.DIRECT: OutputDeliveryMode.DIRECT, +} + + +# TODO: Remove this as both rest type and sdk type are snake case now. +def get_output_type_mapping_from_rest() -> Dict[str, str]: + """Gets the mapping of JobOutputType to AssetType + + :return: Mapping of JobOutputType to AssetType + :rtype: Dict[str, str] + """ + return { + JobOutputType.URI_FILE: AssetTypes.URI_FILE, + JobOutputType.URI_FOLDER: AssetTypes.URI_FOLDER, + JobOutputType.MLTABLE: AssetTypes.MLTABLE, + JobOutputType.MLFLOW_MODEL: AssetTypes.MLFLOW_MODEL, + JobOutputType.CUSTOM_MODEL: AssetTypes.CUSTOM_MODEL, + JobOutputType.TRITON_MODEL: AssetTypes.TRITON_MODEL, + } + + +def get_input_rest_cls_dict() -> Dict[str, RestJobInput]: + """Gets the mapping of AssetType to RestJobInput + + :return: Map of AssetType to RestJobInput + :rtype: Dict[str, RestJobInput] + """ + return { + AssetTypes.URI_FILE: RestUriFileJobInput, + AssetTypes.URI_FOLDER: RestUriFolderJobInput, + AssetTypes.MLTABLE: RestMLTableJobInput, + AssetTypes.MLFLOW_MODEL: RestMLFlowModelJobInput, + AssetTypes.CUSTOM_MODEL: RestCustomModelJobInput, + AssetTypes.TRITON_MODEL: RestTritonModelJobInput, + } + + +def get_output_rest_cls_dict() -> Dict[str, RestJobOutput]: + """Get output rest init cls dict. + + :return: Map of AssetType to RestJobOutput + :rtype: Dict[str, RestJobOutput] + """ + return { + AssetTypes.URI_FILE: RestUriFileJobOutput, + AssetTypes.URI_FOLDER: RestUriFolderJobOutput, + AssetTypes.MLTABLE: RestMLTableJobOutput, + AssetTypes.MLFLOW_MODEL: RestMLFlowModelJobOutput, + AssetTypes.CUSTOM_MODEL: RestCustomModelJobOutput, + AssetTypes.TRITON_MODEL: RestTritonModelJobOutput, + } + + +def build_input_output( + item: Union[InputOutputEntry, Input, Output, str, bool, int, float], + inputs: bool = True, +) -> Union[InputOutputEntry, Input, Output, str, bool, int, float]: + if isinstance(item, (Input, InputOutputEntry, Output)): + # return objects constructed at yaml load or specified in sdk + return item + # parse dictionary into supported class + if isinstance(item, collections.abc.Mapping): + if item.get("data"): + return InputOutputEntry(**item) + # else default to JobInput + return Input(**item) if inputs else Output(**item) + # return literal inputs as-is + return item + + +def _validate_inputs_for(input_consumer_name: str, input_consumer: str, inputs: Optional[Dict]) -> None: + implicit_inputs = re.findall(r"\${{inputs\.([\w\.-]+)}}", input_consumer) + # optional inputs no need to validate whether they're in inputs + optional_inputs = re.findall(r"\[[\w\.\s-]*\${{inputs\.([\w\.-]+)}}]", input_consumer) + for key in implicit_inputs: + if inputs is not None and inputs.get(key, None) is None and key not in optional_inputs: + msg = "Inputs to job does not contain '{}' referenced in " + input_consumer_name + raise ValidationException( + message=msg.format(key), + no_personal_data_message=msg.format("[key]"), + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + +def validate_inputs_for_command(command: Optional[str], inputs: Optional[Dict]) -> None: + if command is not None: + _validate_inputs_for("command", command, inputs) + + +def validate_inputs_for_args(args: str, inputs: Optional[Dict[str, Any]]) -> None: + _validate_inputs_for("args", args, inputs) + + +def validate_key_contains_allowed_characters(key: str) -> None: + if re.match(r"^[a-zA-Z_]+[a-zA-Z0-9_]*$", key) is None: + msg = "Key name {} must be composed letters, numbers, and underscore" + raise ValidationException( + message=msg.format(key), + no_personal_data_message=msg.format("[key]"), + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + +def validate_pipeline_input_key_characters(key: str) -> None: + # Pipeline input allow '.' to support parameter group in key. + # Note: ([a-zA-Z_]+[a-zA-Z0-9_]*) is a valid single key, + # so a valid pipeline key is: ^{single_key}([.]{single_key})*$ + if re.match(IOConstants.VALID_KEY_PATTERN, key) is None: + msg = ( + "Pipeline input key name {} must be composed letters, numbers, and underscores with optional split by dots." + ) + raise ValidationException( + message=msg.format(key), + no_personal_data_message=msg.format("[key]"), + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + ) + + +def to_rest_dataset_literal_inputs( + inputs: Optional[Dict], + *, + job_type: Optional[str], +) -> Dict[str, RestJobInput]: + """Turns dataset and literal inputs into dictionary of REST JobInput. + + :param inputs: Dictionary of dataset and literal inputs to job + :type inputs: Dict[str, Union[int, str, float, bool, JobInput]] + :return: A dictionary mapping input name to a ComponentJobInput or PipelineInput + :rtype: Dict[str, Union[ComponentJobInput, PipelineInput]] + :keyword job_type: When job_type is pipeline, enable dot('.') in parameter keys to support parameter group. + TODO: Remove this after move name validation to Job's customized validate. + :paramtype job_type: str + """ + rest_inputs = {} + + if inputs is not None: + # Pack up the inputs into REST format + for input_name, input_value in inputs.items(): + if job_type == JobType.PIPELINE: + validate_pipeline_input_key_characters(input_name) + elif job_type: + # We pass job_type=None for pipeline node, and want skip this check for nodes. + validate_key_contains_allowed_characters(input_name) + if isinstance(input_value, Input): + if ( + input_value.path + and isinstance(input_value.path, str) + and is_data_binding_expression(input_value.path) + ): + input_data = LiteralJobInput(value=input_value.path) + # set mode attribute manually for binding job input + if input_value.mode: + input_data.mode = INPUT_MOUNT_MAPPING_TO_REST[input_value.mode] + if getattr(input_value, "path_on_compute", None) is not None: + input_data.pathOnCompute = input_value.path_on_compute + input_data.job_input_type = JobInputType.LITERAL + else: + target_cls_dict = get_input_rest_cls_dict() + + if input_value.type in target_cls_dict: + input_data = target_cls_dict[input_value.type]( + uri=input_value.path, + mode=(INPUT_MOUNT_MAPPING_TO_REST[input_value.mode.lower()] if input_value.mode else None), + ) + else: + msg = f"Job input type {input_value.type} is not supported as job input." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + elif input_value is None: + # If the input is None, we need to pass the origin None to the REST API + input_data = LiteralJobInput(value=None) + else: + # otherwise, the input is a literal input + if isinstance(input_value, dict): + input_data = LiteralJobInput(value=str(input_value["value"])) + # set mode attribute manually for binding job input + if "mode" in input_value: + input_data.mode = input_value["mode"] + else: + input_data = LiteralJobInput(value=str(input_value)) + input_data.job_input_type = JobInputType.LITERAL + # Pack up inputs into PipelineInputs or ComponentJobInputs depending on caller + rest_inputs[input_name] = input_data + return rest_inputs + + +def from_rest_inputs_to_dataset_literal(inputs: Dict[str, RestJobInput]) -> Dict: + """Turns REST dataset and literal inputs into the SDK format. + + :param inputs: Dictionary mapping input name to ComponentJobInput or PipelineInput + :type inputs: Dict[str, Union[ComponentJobInput, PipelineInput]] + :return: A dictionary mapping input name to a literal value or JobInput + :rtype: Dict[str, Union[int, str, float, bool, JobInput]] + """ + if inputs is None: + return {} + from_rest_inputs = {} + # Unpack the inputs + for input_name, input_value in inputs.items(): + # TODO:Brandon Clarify with PMs if user should be able to define null input objects + if input_value is None: + continue + + # TODO: Remove this as both rest type and sdk type are snake case now. + type_transfer_dict = get_output_type_mapping_from_rest() + # deal with invalid input type submitted by feb api + # todo: backend help convert node level input/output type + normalize_job_input_output_type(input_value) + + if input_value.job_input_type in type_transfer_dict: + if input_value.uri: + path = input_value.uri + if getattr(input_value, "pathOnCompute", None) is not None: + sourcePathOnCompute = input_value.pathOnCompute + else: + sourcePathOnCompute = None + input_data = Input( + type=type_transfer_dict[input_value.job_input_type], + path=path, + mode=(INPUT_MOUNT_MAPPING_FROM_REST[input_value.mode] if input_value.mode else None), + path_on_compute=sourcePathOnCompute, + ) + elif input_value.job_input_type in (JobInputType.LITERAL, JobInputType.LITERAL): + # otherwise, the input is a literal, so just unpack the InputData value field + input_data = input_value.value + else: + msg = f"Job input type {input_value.job_input_type} is not supported as job input." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + from_rest_inputs[input_name] = input_data # pylint: disable=possibly-used-before-assignment + return from_rest_inputs + + +def to_rest_data_outputs(outputs: Optional[Dict]) -> Dict[str, RestJobOutput]: + """Turns job outputs into REST format. + + :param outputs: Dictionary of dataset outputs from job + :type outputs: Dict[str, JobOutput] + :return: A dictionary mapping output name to a RestJobOutput + :rtype: Dict[str, RestJobOutput] + """ + rest_outputs = {} + if outputs is not None: + for output_name, output_value in outputs.items(): + validate_key_contains_allowed_characters(output_name) + if output_value is None: + # pipeline output could be none, default to URI folder with None mode + output_cls = RestUriFolderJobOutput + rest_outputs[output_name] = output_cls(mode=None) + else: + target_cls_dict = get_output_rest_cls_dict() + + output_value_type = output_value.type if output_value.type else AssetTypes.URI_FOLDER + if output_value_type in target_cls_dict: + output = target_cls_dict[output_value_type]( + asset_name=output_value.name, + asset_version=output_value.version, + uri=output_value.path, + mode=(OUTPUT_MOUNT_MAPPING_TO_REST[output_value.mode.lower()] if output_value.mode else None), + pathOnCompute=getattr(output_value, "path_on_compute", None), + description=output_value.description, + ) + else: + msg = "unsupported JobOutput type: {}".format(output_value.type) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + rest_outputs[output_name] = output + return rest_outputs + + +def from_rest_data_outputs(outputs: Dict[str, RestJobOutput]) -> Dict[str, Output]: + """Turns REST outputs into the SDK format. + + :param outputs: Dictionary of dataset and literal inputs to job + :type outputs: Dict[str, RestJobOutput] + :return: A dictionary mapping input name to a InputOutputEntry + :rtype: Dict[str, JobOutput] + """ + output_type_mapping = get_output_type_mapping_from_rest() + from_rest_outputs = {} + if outputs is None: + return {} + for output_name, output_value in outputs.items(): + if output_value is None: + continue + # deal with invalid output type submitted by feb api + # todo: backend help convert node level input/output type + normalize_job_input_output_type(output_value) + if getattr(output_value, "pathOnCompute", None) is not None: + sourcePathOnCompute = output_value.pathOnCompute + else: + sourcePathOnCompute = None + if output_value.job_output_type in output_type_mapping: + from_rest_outputs[output_name] = Output( + type=output_type_mapping[output_value.job_output_type], + path=output_value.uri, + mode=(OUTPUT_MOUNT_MAPPING_FROM_REST[output_value.mode] if output_value.mode else None), + path_on_compute=sourcePathOnCompute, + description=output_value.description, + name=output_value.asset_name, + version=(output_value.asset_version if hasattr(output_value, "asset_version") else None), + ) + else: + msg = "unsupported JobOutput type: {}".format(output_value.job_output_type) + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.SYSTEM_ERROR, + ) + + return from_rest_outputs diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/_studio_url_from_job_id.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/_studio_url_from_job_id.py new file mode 100644 index 00000000..63ad6f06 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/_studio_url_from_job_id.py @@ -0,0 +1,26 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import re +from typing import Optional + +from azure.ai.ml._azure_environments import _get_aml_resource_id_from_metadata, _get_default_cloud_name + +JOB_ID_RE_PATTERN = re.compile( + ( + r"\/subscriptions\/(?P<subscription>[\w,-]+)\/resourceGroups\/(?P<resource_group>[\w,-]+)\/providers" + r"\/Microsoft\.MachineLearningServices\/workspaces\/(?P<workspace>[\w,-]+)\/jobs\/(?P<run_id>[\w,-]+)" + ) # fmt: skip +) + + +def studio_url_from_job_id(job_id: str) -> Optional[str]: + resource_id = _get_aml_resource_id_from_metadata(_get_default_cloud_name()) + m = JOB_ID_RE_PATTERN.match(job_id) + if m: + return ( + f"{resource_id}/runs/{m.group('run_id')}?wsid=/subscriptions/{m.group('subscription')}" + f"/resourcegroups/{m.group('resource_group')}/workspaces/{m.group('workspace')}" + ) # fmt: skip + return None diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/__init__.py new file mode 100644 index 00000000..e99e9321 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/__init__.py @@ -0,0 +1,16 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) + +from .search_space import SearchSpace +from .stack_ensemble_settings import StackEnsembleSettings +from .training_settings import ClassificationTrainingSettings, TrainingSettings + +__all__ = [ + "ClassificationTrainingSettings", + "TrainingSettings", + "SearchSpace", + "StackEnsembleSettings", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_job.py new file mode 100644 index 00000000..9e1b4d05 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_job.py @@ -0,0 +1,283 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import logging +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + JobBase, + MLTableJobInput, + QueueSettings, + ResourceConfiguration, + TaskType, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import TYPE, AssetTypes +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import ( + AmlTokenConfiguration, + ManagedIdentityConfiguration, + UserIdentityConfiguration, +) +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.job import Job +from azure.ai.ml.entities._job.job_io_mixin import JobIOMixin +from azure.ai.ml.entities._job.pipeline._io import AutoMLNodeIOMixin +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + +module_logger = logging.getLogger(__name__) + + +class AutoMLJob(Job, JobIOMixin, AutoMLNodeIOMixin, ABC): + """Initialize an AutoML job entity. + + Constructor for an AutoMLJob. + + :keyword resources: Resource configuration for the AutoML job, defaults to None + :paramtype resources: typing.Optional[ResourceConfiguration] + :keyword identity: Identity that training job will use while running on compute, defaults to None + :paramtype identity: typing.Optional[ typing.Union[ManagedIdentityConfiguration, AmlTokenConfiguration + , UserIdentityConfiguration] ] + :keyword environment_id: The environment id for the AutoML job, defaults to None + :paramtype environment_id: typing.Optional[str] + :keyword environment_variables: The environment variables for the AutoML job, defaults to None + :paramtype environment_variables: typing.Optional[Dict[str, str]] + :keyword outputs: The outputs for the AutoML job, defaults to None + :paramtype outputs: typing.Optional[Dict[str, str]] + :keyword queue_settings: The queue settings for the AutoML job, defaults to None + :paramtype queue_settings: typing.Optional[QueueSettings] + :raises ValidationException: task type validation error + :raises NotImplementedError: Raises NotImplementedError + :return: An AutoML Job + :rtype: AutoMLJob + """ + + def __init__( + self, + *, + resources: Optional[ResourceConfiguration] = None, + identity: Optional[ + Union[ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration] + ] = None, + queue_settings: Optional[QueueSettings] = None, + **kwargs: Any, + ) -> None: + """Initialize an AutoML job entity. + + Constructor for an AutoMLJob. + + :keyword resources: Resource configuration for the AutoML job, defaults to None + :paramtype resources: typing.Optional[ResourceConfiguration] + :keyword identity: Identity that training job will use while running on compute, defaults to None + :paramtype identity: typing.Optional[ typing.Union[ManagedIdentityConfiguration, AmlTokenConfiguration + , UserIdentityConfiguration] ] + :keyword environment_id: The environment id for the AutoML job, defaults to None + :paramtype environment_id: typing.Optional[str] + :keyword environment_variables: The environment variables for the AutoML job, defaults to None + :paramtype environment_variables: typing.Optional[Dict[str, str]] + :keyword outputs: The outputs for the AutoML job, defaults to None + :paramtype outputs: typing.Optional[Dict[str, str]] + :keyword queue_settings: The queue settings for the AutoML job, defaults to None + :paramtype queue_settings: typing.Optional[QueueSettings] + :raises ValidationException: task type validation error + :raises NotImplementedError: Raises NotImplementedError + """ + kwargs[TYPE] = JobType.AUTOML + self.environment_id = kwargs.pop("environment_id", None) + self.environment_variables = kwargs.pop("environment_variables", None) + self.outputs = kwargs.pop("outputs", None) + + super().__init__(**kwargs) + + self.resources = resources + self.identity = identity + self.queue_settings = queue_settings + + @property + @abstractmethod + def training_data(self) -> Input: + """The training data for the AutoML job. + + :raises NotImplementedError: Raises NotImplementedError + :return: Returns the training data for the AutoML job. + :rtype: Input + """ + raise NotImplementedError() + + @training_data.setter + def training_data(self, value: Any) -> None: + self.training_data = value + + @property + @abstractmethod + def validation_data(self) -> Input: + """The validation data for the AutoML job. + + :raises NotImplementedError: Raises NotImplementedError + :return: Returns the validation data for the AutoML job. + :rtype: Input + """ + raise NotImplementedError() + + @validation_data.setter + def validation_data(self, value: Any) -> None: + self.validation_data = value + + @property + @abstractmethod + def test_data(self) -> Input: + """The test data for the AutoML job. + + :raises NotImplementedError: Raises NotImplementedError + :return: Returns the test data for the AutoML job. + :rtype: Input + """ + raise NotImplementedError() + + @test_data.setter + def test_data(self, value: Any) -> None: + self.test_data = value + + @classmethod + def _load_from_rest(cls, obj: JobBase) -> "AutoMLJob": + """Loads the rest object to a dict containing items to init the AutoMLJob objects. + + :param obj: Azure Resource Manager resource envelope. + :type obj: JobBase + :raises ValidationException: task type validation error + :return: An AutoML Job + :rtype: AutoMLJob + """ + task_type = ( + camel_to_snake(obj.properties.task_details.task_type) if obj.properties.task_details.task_type else None + ) + class_type = cls._get_task_mapping().get(task_type, None) + if class_type: + res: AutoMLJob = class_type._from_rest_object(obj) + return res + msg = f"Unsupported task type: {obj.properties.task_details.task_type}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.SYSTEM_ERROR, + ) + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "AutoMLJob": + """Loads the dictionary objects to an AutoMLJob object. + + :param data: A data dictionary. + :type data: typing.Dict + :param context: A context dictionary. + :type context: typing.Dict + :param additional_message: An additional message to be logged in the ValidationException. + :type additional_message: str + + :raises ValidationException: task type validation error + :return: An AutoML Job + :rtype: AutoMLJob + """ + task_type = data.get(AutoMLConstants.TASK_TYPE_YAML) + class_type = cls._get_task_mapping().get(task_type, None) + if class_type: + res: AutoMLJob = class_type._load_from_dict( + data, + context, + additional_message, + **kwargs, + ) + return res + msg = f"Unsupported task type: {task_type}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "AutoMLJob": + """Create an automl job instance from schema parsed dict. + + :param loaded_data: A loaded_data dictionary. + :type loaded_data: typing.Dict + :raises ValidationException: task type validation error + :return: An AutoML Job + :rtype: AutoMLJob + """ + task_type = loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML) + class_type = cls._get_task_mapping().get(task_type, None) + if class_type: + res: AutoMLJob = class_type._create_instance_from_schema_dict(loaded_data=loaded_data) + return res + msg = f"Unsupported task type: {task_type}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + @classmethod + def _get_task_mapping(cls) -> Dict: + """Create a mapping of task type to job class. + + :return: An AutoMLVertical object containing the task type to job class mapping. + :rtype: AutoMLVertical + """ + from .image import ( + ImageClassificationJob, + ImageClassificationMultilabelJob, + ImageInstanceSegmentationJob, + ImageObjectDetectionJob, + ) + from .nlp import TextClassificationJob, TextClassificationMultilabelJob, TextNerJob + from .tabular import ClassificationJob, ForecastingJob, RegressionJob + + # create a mapping of task type to job class + return { + camel_to_snake(TaskType.CLASSIFICATION): ClassificationJob, + camel_to_snake(TaskType.REGRESSION): RegressionJob, + camel_to_snake(TaskType.FORECASTING): ForecastingJob, + camel_to_snake(TaskType.IMAGE_CLASSIFICATION): ImageClassificationJob, + camel_to_snake(TaskType.IMAGE_CLASSIFICATION_MULTILABEL): ImageClassificationMultilabelJob, + camel_to_snake(TaskType.IMAGE_OBJECT_DETECTION): ImageObjectDetectionJob, + camel_to_snake(TaskType.IMAGE_INSTANCE_SEGMENTATION): ImageInstanceSegmentationJob, + camel_to_snake(TaskType.TEXT_NER): TextNerJob, + camel_to_snake(TaskType.TEXT_CLASSIFICATION): TextClassificationJob, + camel_to_snake(TaskType.TEXT_CLASSIFICATION_MULTILABEL): TextClassificationMultilabelJob, + } + + def _resolve_data_inputs(self, rest_job: "AutoMLJob") -> None: + """Resolve JobInputs to MLTableJobInputs within data_settings. + + :param rest_job: The rest job object. + :type rest_job: AutoMLJob + """ + if isinstance(rest_job.training_data, Input): + rest_job.training_data = MLTableJobInput(uri=rest_job.training_data.path) + if isinstance(rest_job.validation_data, Input): + rest_job.validation_data = MLTableJobInput(uri=rest_job.validation_data.path) + if hasattr(rest_job, "test_data") and isinstance(rest_job.test_data, Input): + rest_job.test_data = MLTableJobInput(uri=rest_job.test_data.path) + + def _restore_data_inputs(self) -> None: + """Restore MLTableJobInputs to JobInputs within data_settings.""" + if isinstance(self.training_data, MLTableJobInput): + self.training_data = Input(type=AssetTypes.MLTABLE, path=self.training_data.uri) + if isinstance(self.validation_data, MLTableJobInput): + self.validation_data = Input(type=AssetTypes.MLTABLE, path=self.validation_data.uri) + if hasattr(self, "test_data") and isinstance(self.test_data, MLTableJobInput): + self.test_data = Input(type=AssetTypes.MLTABLE, path=self.test_data.uri) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_vertical.py new file mode 100644 index 00000000..f11be81c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_vertical.py @@ -0,0 +1,134 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from abc import abstractmethod +from typing import Any, Optional + +from azure.ai.ml import Input + +from .automl_job import AutoMLJob + + +class AutoMLVertical(AutoMLJob): + """Abstract class for AutoML verticals. + + :param task_type: The type of task to run. Possible values include: "classification", "regression", "forecasting". + :type task_type: str + :param training_data: Training data input + :type training_data: Input + :param validation_data: Validation data input + :type validation_data: Input + :param test_data: Test data input, defaults to None + :type test_data: typing.Optional[Input] + :raises ValueError: If task_type is not one of "classification", "regression", "forecasting". + :raises ValueError: If training_data is not of type Input. + :raises ValueError: If validation_data is not of type Input. + :raises ValueError: If test_data is not of type Input. + """ + + @abstractmethod + def __init__( + self, + task_type: str, + training_data: Input, + validation_data: Input, + test_data: Optional[Input] = None, + **kwargs: Any + ) -> None: + """Initialize AutoMLVertical. + + Constructor for AutoMLVertical. + + :param task_type: The type of task to run. Possible values include: "classification", "regression" + , "forecasting". + :type task_type: str + :param training_data: Training data input + :type training_data: Input + :param validation_data: Validation data input + :type validation_data: Input + :param test_data: Test data input, defaults to None + :type test_data: typing.Optional[Input] + :raises ValueError: If task_type is not one of "classification", "regression", "forecasting". + :raises ValueError: If training_data is not of type Input. + :raises ValueError: If validation_data is not of type Input. + :raises ValueError: If test_data is not of type Input. + """ + self._task_type = task_type + self.training_data = training_data + self.validation_data = validation_data + self.test_data = test_data # type: ignore + super().__init__(**kwargs) + + @property + def task_type(self) -> str: + """Get task type. + + :return: The type of task to run. Possible values include: "classification", "regression", "forecasting". + :rtype: str + """ + return self._task_type + + @task_type.setter + def task_type(self, task_type: str) -> None: + """Set task type. + + :param task_type: The type of task to run. Possible values include: "classification", "regression" + , "forecasting". + :type task_type: str + """ + self._task_type = task_type + + @property + def training_data(self) -> Input: + """Get training data. + + :return: Training data input + :rtype: Input + """ + return self._training_data + + @training_data.setter + def training_data(self, training_data: Input) -> None: + """Set training data. + + :param training_data: Training data input + :type training_data: Input + """ + self._training_data = training_data + + @property + def validation_data(self) -> Input: + """Get validation data. + + :return: Validation data input + :rtype: Input + """ + return self._validation_data + + @validation_data.setter + def validation_data(self, validation_data: Input) -> None: + """Set validation data. + + :param validation_data: Validation data input + :type validation_data: Input + """ + self._validation_data = validation_data + + @property + def test_data(self) -> Input: + """Get test data. + + :return: Test data input + :rtype: Input + """ + return self._test_data + + @test_data.setter + def test_data(self, test_data: Input) -> None: + """Set test data. + + :param test_data: Test data input + :type test_data: Input + """ + self._test_data = test_data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/featurization_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/featurization_settings.py new file mode 100644 index 00000000..c9e73d21 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/featurization_settings.py @@ -0,0 +1,32 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class FeaturizationSettings(RestTranslatableMixin): + """Base Featurization settings.""" + + def __init__( + self, + *, + dataset_language: Optional[str] = None, + ): + self.dataset_language = dataset_language + + def __eq__(self, other: object) -> bool: + if not isinstance(other, FeaturizationSettings): + return NotImplemented + + return self.dataset_language == other.dataset_language + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +class FeaturizationSettingsType: + NLP = "nlp" + TABULAR = "tabular" diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/__init__.py new file mode 100644 index 00000000..46964086 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/__init__.py @@ -0,0 +1,35 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from .automl_image import AutoMLImage +from .image_classification_job import ImageClassificationJob +from .image_classification_multilabel_job import ImageClassificationMultilabelJob +from .image_classification_search_space import ImageClassificationSearchSpace +from .image_instance_segmentation_job import ImageInstanceSegmentationJob +from .image_limit_settings import ImageLimitSettings +from .image_model_settings import ( + ImageModelSettingsClassification, + ImageModelSettingsObjectDetection, + LogTrainingMetrics, + LogValidationLoss, +) +from .image_object_detection_job import ImageObjectDetectionJob +from .image_object_detection_search_space import ImageObjectDetectionSearchSpace +from .image_sweep_settings import ImageSweepSettings + +__all__ = [ + "AutoMLImage", + "LogTrainingMetrics", + "LogValidationLoss", + "ImageClassificationJob", + "ImageClassificationMultilabelJob", + "ImageClassificationSearchSpace", + "ImageInstanceSegmentationJob", + "ImageLimitSettings", + "ImageObjectDetectionJob", + "ImageObjectDetectionSearchSpace", + "ImageSweepSettings", + "ImageModelSettingsClassification", + "ImageModelSettingsObjectDetection", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image.py new file mode 100644 index 00000000..a07bba4a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image.py @@ -0,0 +1,244 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from abc import ABC +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import LogVerbosity, SamplingAlgorithmType +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.automl_vertical import AutoMLVertical +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._job.sweep.early_termination_policy import ( + BanditPolicy, + MedianStoppingPolicy, + TruncationSelectionPolicy, +) +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class AutoMLImage(AutoMLVertical, ABC): + """Base class for all AutoML Image jobs. + You should not instantiate this class directly. + Instead you should create classes for specific AutoML Image tasks. + + :keyword task_type: Required. Type of task to run. + Possible values include: "ImageClassification", "ImageClassificationMultilabel", + "ImageObjectDetection", "ImageInstanceSegmentation" + :paramtype task_type: str + :keyword limits: Limit settings for all AutoML Image jobs. Defaults to None. + :paramtype limits: Optional[~azure.ai.ml.automl.ImageLimitSettings] + :keyword sweep: Sweep settings for all AutoML Image jobs. Defaults to None. + :paramtype sweep: Optional[~azure.ai.ml.automl.ImageSweepSettings] + :keyword kwargs: Additional keyword arguments for AutoMLImage. + :paramtype kwargs: Dict[str, Any] + """ + + def __init__( + self, + *, + task_type: str, + limits: Optional[ImageLimitSettings] = None, + sweep: Optional[ImageSweepSettings] = None, + **kwargs: Any, + ) -> None: + self.log_verbosity = kwargs.pop("log_verbosity", LogVerbosity.INFO) + self.target_column_name = kwargs.pop("target_column_name", None) + self.validation_data_size = kwargs.pop("validation_data_size", None) + + super().__init__( + task_type=task_type, + training_data=kwargs.pop("training_data", None), + validation_data=kwargs.pop("validation_data", None), + **kwargs, + ) + + # Set default value for self._limits as it is a required property in rest object. + self._limits = limits or ImageLimitSettings() + self._sweep = sweep + + @property + def log_verbosity(self) -> LogVerbosity: + """Returns the verbosity of the logger. + + :return: The log verbosity. + :rtype: ~azure.ai.ml._restclient.v2023_04_01_preview.models.LogVerbosity + """ + return self._log_verbosity + + @log_verbosity.setter + def log_verbosity(self, value: Union[str, LogVerbosity]) -> None: + """Sets the verbosity of the logger. + + :param value: The value to set the log verbosity to. + Possible values include: "NotSet", "Debug", "Info", "Warning", "Error", "Critical". + :type value: Union[str, ~azure.ai.ml._restclient.v2023_04_01_preview.models.LogVerbosity] + """ + self._log_verbosity = None if value is None else LogVerbosity[camel_to_snake(value).upper()] + + @property + def limits(self) -> ImageLimitSettings: + """Returns the limit settings for all AutoML Image jobs. + + :return: The limit settings. + :rtype: ~azure.ai.ml.automl.ImageLimitSettings + """ + return self._limits + + @limits.setter + def limits(self, value: Union[Dict, ImageLimitSettings]) -> None: + if isinstance(value, ImageLimitSettings): + self._limits = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for limit settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_limits(**value) + + @property + def sweep(self) -> Optional[ImageSweepSettings]: + """Returns the sweep settings for all AutoML Image jobs. + + :return: The sweep settings. + :rtype: ~azure.ai.ml.automl.ImageSweepSettings + """ + return self._sweep + + @sweep.setter + def sweep(self, value: Union[Dict, ImageSweepSettings]) -> None: + """Sets the sweep settings for all AutoML Image jobs. + + :param value: The value to set the sweep settings to. + :type value: Union[Dict, ~azure.ai.ml.automl.ImageSweepSettings] + :raises ~azure.ai.ml.exceptions.ValidationException: If value is not a dictionary. + :return: None + """ + if isinstance(value, ImageSweepSettings): + self._sweep = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for sweep settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_sweep(**value) + + def set_data( + self, + *, + training_data: Input, + target_column_name: str, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + ) -> None: + """Data settings for all AutoML Image jobs. + + :keyword training_data: Required. Training data. + :type training_data: ~azure.ai.ml.entities.Input + :keyword target_column_name: Required. Target column name. + :type target_column_name: str + :keyword validation_data: Optional. Validation data. + :type validation_data: Optional[~azure.ai.ml.entities.Input] + :keyword validation_data_size: Optional. The fraction of training dataset that needs to be set aside for + validation purpose. Values should be in range (0.0 , 1.0). + Applied only when validation dataset is not provided. + :type validation_data_size: Optional[float] + :return: None + """ + self.target_column_name = self.target_column_name if target_column_name is None else target_column_name + self.training_data = self.training_data if training_data is None else training_data + self.validation_data = self.validation_data if validation_data is None else validation_data + self.validation_data_size = self.validation_data_size if validation_data_size is None else validation_data_size + + def set_limits( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_trials: Optional[int] = None, + timeout_minutes: Optional[int] = None, + ) -> None: + """Limit settings for all AutoML Image Jobs. + + :keyword max_concurrent_trials: Maximum number of trials to run concurrently. + :type max_concurrent_trials: Optional[int]. Defaults to None. + :keyword max_trials: Maximum number of trials to run. Defaults to None. + :type max_trials: Optional[int] + :keyword timeout_minutes: AutoML job timeout. + :type timeout_minutes: ~datetime.timedelta + :return: None + """ + self._limits = self._limits or ImageLimitSettings() + self._limits.max_concurrent_trials = ( + max_concurrent_trials if max_concurrent_trials is not None else self._limits.max_concurrent_trials + ) + self._limits.max_trials = max_trials if max_trials is not None else self._limits.max_trials + self._limits.timeout_minutes = timeout_minutes if timeout_minutes is not None else self._limits.timeout_minutes + + def set_sweep( + self, + *, + sampling_algorithm: Union[ + str, SamplingAlgorithmType.RANDOM, SamplingAlgorithmType.GRID, SamplingAlgorithmType.BAYESIAN + ], + early_termination: Optional[Union[BanditPolicy, MedianStoppingPolicy, TruncationSelectionPolicy]] = None, + ) -> None: + """Sweep settings for all AutoML Image jobs. + + :keyword sampling_algorithm: Required. Type of the hyperparameter sampling + algorithms. Possible values include: "Grid", "Random", "Bayesian". + :type sampling_algorithm: Union[str, ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.RANDOM, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.GRID, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.BAYESIAN] + :keyword early_termination: Type of early termination policy. + :type early_termination: Union[ + ~azure.mgmt.machinelearningservices.models.BanditPolicy, + ~azure.mgmt.machinelearningservices.models.MedianStoppingPolicy, + ~azure.mgmt.machinelearningservices.models.TruncationSelectionPolicy] + :return: None + """ + if self._sweep: + self._sweep.sampling_algorithm = sampling_algorithm + else: + self._sweep = ImageSweepSettings(sampling_algorithm=sampling_algorithm) + + self._sweep.early_termination = early_termination or self._sweep.early_termination + + def __eq__(self, other: object) -> bool: + """Compares two AutoMLImage objects for equality. + + :param other: The other AutoMLImage object to compare to. + :type other: ~azure.ai.ml.automl.AutoMLImage + :return: True if the two AutoMLImage objects are equal; False otherwise. + :rtype: bool + """ + if not isinstance(other, AutoMLImage): + return NotImplemented + + return ( + self.target_column_name == other.target_column_name + and self.training_data == other.training_data + and self.validation_data == other.validation_data + and self.validation_data_size == other.validation_data_size + and self._limits == other._limits + and self._sweep == other._sweep + ) + + def __ne__(self, other: object) -> bool: + """Compares two AutoMLImage objects for inequality. + + :param other: The other AutoMLImage object to compare to. + :type other: ~azure.ai.ml.automl.AutoMLImage + :return: True if the two AutoMLImage objects are not equal; False otherwise. + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_classification_base.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_classification_base.py new file mode 100644 index 00000000..ef0c8a2d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_classification_base.py @@ -0,0 +1,439 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import LearningRateScheduler, StochasticOptimizer +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._job.automl.image.automl_image import AutoMLImage +from azure.ai.ml.entities._job.automl.image.image_classification_search_space import ImageClassificationSearchSpace +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsClassification +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.utils import cast_to_specific_search_space +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class AutoMLImageClassificationBase(AutoMLImage): + """Base class for AutoML Image Classification and Image Classification Multilabel tasks. + Please do not instantiate this class directly. Instantiate one of the child classes instead. + + :keyword task_type: Type of task to run. + Possible values include: "ImageClassification", "ImageClassificationMultilabel". + :paramtype task_type: str + :keyword limits: Limits for Automl image classification jobs. Defaults to None. + :paramtype limits: Optional[~azure.ai.ml.automl.ImageLimitSettings] + :keyword sweep: Sweep settings for Automl image classification jobs. Defaults to None. + :paramtype sweep: Optional[~azure.ai.ml.automl.ImageSweepSettings] + :keyword training_parameters: Training parameters for Automl image classification jobs. Defaults to None. + :paramtype training_parameters: Optional[~azure.ai.ml.automl.ImageModelSettingsClassification] + :keyword search_space: Search space for Automl image classification jobs. Defaults to None. + :paramtype search_space: Optional[List[~azure.ai.ml.automl.ImageClassificationSearchSpace]] + :keyword kwargs: Other Keyword arguments for AutoMLImageClassificationBase class. + :paramtype kwargs: Dict[str, Any] + """ + + def __init__( + self, + *, + task_type: str, + limits: Optional[ImageLimitSettings] = None, + sweep: Optional[ImageSweepSettings] = None, + training_parameters: Optional[ImageModelSettingsClassification] = None, + search_space: Optional[List[ImageClassificationSearchSpace]] = None, + **kwargs: Any, + ) -> None: + self._training_parameters: Optional[ImageModelSettingsClassification] = None + + super().__init__(task_type=task_type, limits=limits, sweep=sweep, **kwargs) + self.training_parameters = training_parameters # Assigning training_parameters through setter method. + self._search_space = search_space + + @property + def training_parameters(self) -> Optional[ImageModelSettingsClassification]: + """ + :rtype: ~azure.ai.ml.automl.ImageModelSettingsClassification + :return: Training parameters for AutoML Image Classification and Image Classification Multilabel tasks. + """ + return self._training_parameters + + @training_parameters.setter + def training_parameters(self, value: Union[Dict, ImageModelSettingsClassification]) -> None: + """Setting Image training parameters for AutoML Image Classification and Image Classification Multilabel tasks. + + :param value: Training parameters for AutoML Image Classification and Image Classification Multilabel tasks. + :type value: Union[Dict, ~azure.ai.ml.automl.ImageModelSettingsClassification] + :raises ~azure.ml.exceptions.ValidationException if value is not a dictionary or + ImageModelSettingsClassification. + :return: None + """ + if value is None: + self._training_parameters = None + elif isinstance(value, ImageModelSettingsClassification): + self._training_parameters = value + # set_training_parameters convert parameter values from snake case str to enum. + # We need to add any future enum parameters in this call to support snake case str. + self.set_training_parameters( + optimizer=value.optimizer, + learning_rate_scheduler=value.learning_rate_scheduler, + ) + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for model settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training_parameters(**value) + + @property + def search_space(self) -> Optional[List[ImageClassificationSearchSpace]]: + """ + :rtype: List[~azure.ai.ml.automl.ImageClassificationSearchSpace] + :return: Search space for AutoML Image Classification and Image Classification Multilabel tasks. + """ + return self._search_space + + @search_space.setter + def search_space(self, value: Union[List[Dict], List[SearchSpace]]) -> None: + """Setting Image search space for AutoML Image Classification and Image Classification Multilabel tasks. + + :param value: Search space for AutoML Image Classification and Image Classification Multilabel tasks. + :type value: Union[List[Dict], List[~azure.ai.ml.automl.ImageClassificationSearchSpace]] + :raises ~azure.ml.exceptions.ValidationException if value is not a list of dictionaries or + ImageClassificationSearchSpace. + """ + if not isinstance(value, list): + msg = "Expected a list for search space." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + all_dict_type = all(isinstance(item, dict) for item in value) + all_search_space_type = all(isinstance(item, SearchSpace) for item in value) + + if all_search_space_type or all_dict_type: + self._search_space = [ + cast_to_specific_search_space(item, ImageClassificationSearchSpace, self.task_type) # type: ignore + for item in value + ] + else: + msg = "Expected all items in the list to be either dictionaries or ImageClassificationSearchSpace objects." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + # pylint: disable=too-many-locals + def set_training_parameters( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[Union[str, LearningRateScheduler]] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[Union[str, StochasticOptimizer]] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + training_crop_size: Optional[int] = None, + validation_crop_size: Optional[int] = None, + validation_resize_size: Optional[int] = None, + weighted_loss: Optional[int] = None, + ) -> None: + """Setting Image training parameters for AutoML Image Classification and Image Classification Multilabel tasks. + + :keyword advanced_settings: Settings for advanced scenarios. + :paramtype advanced_settings: str + :keyword ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :paramtype ams_gradient: bool + :keyword beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :paramtype beta1: float + :keyword beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :paramtype beta2: float + :keyword checkpoint_frequency: Frequency to store model checkpoints. Must be a positive + integer. + :paramtype checkpoint_frequency: int + :keyword checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :paramtype checkpoint_run_id: str + :keyword distributed: Whether to use distributed training. + :paramtype distributed: bool + :keyword early_stopping: Enable early stopping logic during training. + :paramtype early_stopping: bool + :keyword early_stopping_delay: Minimum number of epochs or validation evaluations to wait + before primary metric improvement + is tracked for early stopping. Must be a positive integer. + :paramtype early_stopping_delay: int + :keyword early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :paramtype early_stopping_patience: int + :keyword enable_onnx_normalization: Enable normalization when exporting ONNX model. + :paramtype enable_onnx_normalization: bool + :keyword evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. + Must be a positive integer. + :paramtype evaluation_frequency: int + :keyword gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :paramtype gradient_accumulation_step: int + :keyword layers_to_freeze: Number of layers to freeze for the model. Must be a positive + integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters#model-agnostic-hyperparameters. # pylint: disable=line-too-long + :type layers_to_freeze: int + :keyword learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :paramtype learning_rate: float + :keyword learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :keyword model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :keyword momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, + 1]. + :paramtype momentum: float + :keyword nesterov: Enable nesterov when optimizer is 'sgd'. + :paramtype nesterov: bool + :keyword number_of_epochs: Number of training epochs. Must be a positive integer. + :paramtype number_of_epochs: int + :keyword number_of_workers: Number of data loader workers. Must be a non-negative integer. + :paramtype number_of_workers: int + :keyword optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :keyword random_seed: Random seed to be used when using deterministic training. + :paramtype random_seed: int + :keyword step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float + in the range [0, 1]. + :paramtype step_lr_gamma: float + :keyword step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be + a positive integer. + :paramtype step_lr_step_size: int + :keyword training_batch_size: Training batch size. Must be a positive integer. + :paramtype training_batch_size: int + :keyword validation_batch_size: Validation batch size. Must be a positive integer. + :paramtype validation_batch_size: int + :keyword warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :paramtype warmup_cosine_lr_cycles: float + :keyword warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :paramtype warmup_cosine_lr_warmup_epochs: int + :keyword weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must + be a float in the range[0, 1]. + :paramtype weight_decay: float + :keyword training_crop_size: Image crop size that is input to the neural network for the + training dataset. Must be a positive integer. + :paramtype training_crop_size: int + :keyword validation_crop_size: Image crop size that is input to the neural network for the + validation dataset. Must be a positive integer. + :paramtype validation_crop_size: int + :keyword validation_resize_size: Image size to which to resize before cropping for validation + dataset. Must be a positive integer. + :paramtype validation_resize_size: int + :keyword weighted_loss: Weighted loss. The accepted values are 0 for no weighted loss. + 1 for weighted loss with sqrt.(class_weights). 2 for weighted loss with class_weights. Must be + 0 or 1 or 2. + :paramtype weighted_loss: int + """ + self._training_parameters = self._training_parameters or ImageModelSettingsClassification() + + self._training_parameters.advanced_settings = ( + advanced_settings if advanced_settings is not None else self._training_parameters.advanced_settings + ) + self._training_parameters.ams_gradient = ( + ams_gradient if ams_gradient is not None else self._training_parameters.ams_gradient + ) + self._training_parameters.beta1 = beta1 if beta1 is not None else self._training_parameters.beta1 + self._training_parameters.beta2 = beta2 if beta2 is not None else self._training_parameters.beta2 + self._training_parameters.checkpoint_frequency = ( + checkpoint_frequency if checkpoint_frequency is not None else self._training_parameters.checkpoint_frequency + ) + self._training_parameters.checkpoint_run_id = ( + checkpoint_run_id if checkpoint_run_id is not None else self._training_parameters.checkpoint_run_id + ) + self._training_parameters.distributed = ( + distributed if distributed is not None else self._training_parameters.distributed + ) + self._training_parameters.early_stopping = ( + early_stopping if early_stopping is not None else self._training_parameters.early_stopping + ) + self._training_parameters.early_stopping_delay = ( + early_stopping_delay if early_stopping_delay is not None else self._training_parameters.early_stopping_delay + ) + self._training_parameters.early_stopping_patience = ( + early_stopping_patience + if early_stopping_patience is not None + else self._training_parameters.early_stopping_patience + ) + self._training_parameters.enable_onnx_normalization = ( + enable_onnx_normalization + if enable_onnx_normalization is not None + else self._training_parameters.enable_onnx_normalization + ) + self._training_parameters.evaluation_frequency = ( + evaluation_frequency if evaluation_frequency is not None else self._training_parameters.evaluation_frequency + ) + self._training_parameters.gradient_accumulation_step = ( + gradient_accumulation_step + if gradient_accumulation_step is not None + else self._training_parameters.gradient_accumulation_step + ) + self._training_parameters.layers_to_freeze = ( + layers_to_freeze if layers_to_freeze is not None else self._training_parameters.layers_to_freeze + ) + self._training_parameters.learning_rate = ( + learning_rate if learning_rate is not None else self._training_parameters.learning_rate + ) + self._training_parameters.learning_rate_scheduler = ( + LearningRateScheduler[camel_to_snake(learning_rate_scheduler).upper()] + if learning_rate_scheduler is not None + else self._training_parameters.learning_rate_scheduler + ) + self._training_parameters.model_name = ( + model_name if model_name is not None else self._training_parameters.model_name + ) + self._training_parameters.momentum = momentum if momentum is not None else self._training_parameters.momentum + self._training_parameters.nesterov = nesterov if nesterov is not None else self._training_parameters.nesterov + self._training_parameters.number_of_epochs = ( + number_of_epochs if number_of_epochs is not None else self._training_parameters.number_of_epochs + ) + self._training_parameters.number_of_workers = ( + number_of_workers if number_of_workers is not None else self._training_parameters.number_of_workers + ) + self._training_parameters.optimizer = ( + StochasticOptimizer[camel_to_snake(optimizer).upper()] + if optimizer is not None + else self._training_parameters.optimizer + ) + self._training_parameters.random_seed = ( + random_seed if random_seed is not None else self._training_parameters.random_seed + ) + self._training_parameters.step_lr_gamma = ( + step_lr_gamma if step_lr_gamma is not None else self._training_parameters.step_lr_gamma + ) + self._training_parameters.step_lr_step_size = ( + step_lr_step_size if step_lr_step_size is not None else self._training_parameters.step_lr_step_size + ) + self._training_parameters.training_batch_size = ( + training_batch_size if training_batch_size is not None else self._training_parameters.training_batch_size + ) + self._training_parameters.validation_batch_size = ( + validation_batch_size + if validation_batch_size is not None + else self._training_parameters.validation_batch_size + ) + self._training_parameters.warmup_cosine_lr_cycles = ( + warmup_cosine_lr_cycles + if warmup_cosine_lr_cycles is not None + else self._training_parameters.warmup_cosine_lr_cycles + ) + self._training_parameters.warmup_cosine_lr_warmup_epochs = ( + warmup_cosine_lr_warmup_epochs + if warmup_cosine_lr_warmup_epochs is not None + else self._training_parameters.warmup_cosine_lr_warmup_epochs + ) + self._training_parameters.weight_decay = ( + weight_decay if weight_decay is not None else self._training_parameters.weight_decay + ) + self._training_parameters.training_crop_size = ( + training_crop_size if training_crop_size is not None else self._training_parameters.training_crop_size + ) + self._training_parameters.validation_crop_size = ( + validation_crop_size if validation_crop_size is not None else self._training_parameters.validation_crop_size + ) + self._training_parameters.validation_resize_size = ( + validation_resize_size + if validation_resize_size is not None + else self._training_parameters.validation_resize_size + ) + self._training_parameters.weighted_loss = ( + weighted_loss if weighted_loss is not None else self._training_parameters.weighted_loss + ) + + # pylint: enable=too-many-locals + + def extend_search_space( + self, + value: Union[SearchSpace, List[SearchSpace]], + ) -> None: + """Add Search space for AutoML Image Classification and Image Classification Multilabel tasks. + + :param value: specify either an instance of ImageClassificationSearchSpace or list of + ImageClassificationSearchSpace for searching through the parameter space + :type value: Union[ImageClassificationSearchSpace, List[ImageClassificationSearchSpace]] + """ + self._search_space = self._search_space or [] + + if isinstance(value, list): + self._search_space.extend( + [ + cast_to_specific_search_space(item, ImageClassificationSearchSpace, self.task_type) # type: ignore + for item in value + ] + ) + else: + self._search_space.append( + cast_to_specific_search_space(value, ImageClassificationSearchSpace, self.task_type) # type: ignore + ) + + @classmethod + def _get_search_space_from_str(cls, search_space_str: str) -> Optional[List[ImageClassificationSearchSpace]]: + return ( + [ImageClassificationSearchSpace._from_rest_object(entry) for entry in search_space_str if entry is not None] + if search_space_str is not None + else None + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, AutoMLImageClassificationBase): + return NotImplemented + + if not super().__eq__(other): + return False + + return self._training_parameters == other._training_parameters and self._search_space == other._search_space + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_object_detection_base.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_object_detection_base.py new file mode 100644 index 00000000..db0c7bc6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_object_detection_base.py @@ -0,0 +1,524 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LearningRateScheduler, + LogTrainingMetrics, + LogValidationLoss, + ModelSize, + StochasticOptimizer, + ValidationMetricType, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._job.automl import SearchSpace +from azure.ai.ml.entities._job.automl.image.automl_image import AutoMLImage +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsObjectDetection +from azure.ai.ml.entities._job.automl.image.image_object_detection_search_space import ImageObjectDetectionSearchSpace +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._job.automl.utils import cast_to_specific_search_space +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class AutoMLImageObjectDetectionBase(AutoMLImage): + """Base class for AutoML Image Object Detection and Image Instance Segmentation tasks. + + :keyword task_type: Type of task to run. Possible values include: "ImageObjectDetection", + "ImageInstanceSegmentation". + :paramtype task_type: str + :keyword limits: The resource limits for the job. + :paramtype limits: Optional[~azure.ai.ml.entities._job.automl.image.image_limit_settings.ImageLimitSettings] + :keyword sweep: The sweep settings for the job. + :paramtype sweep: Optional[~azure.ai.ml.entities._job.automl.image.image_sweep_settings.ImageSweepSettings] + :keyword training_parameters: The training parameters for the job. + :paramtype training_parameters: Optional[~azure.ai.ml.automl.ImageModelSettingsObjectDetection] + :keyword search_space: The search space for the job. + :paramtype search_space: Optional[List[~azure.ai.ml.automl.ImageObjectDetectionSearchSpace]] + """ + + def __init__( + self, + *, + task_type: str, + limits: Optional[ImageLimitSettings] = None, + sweep: Optional[ImageSweepSettings] = None, + training_parameters: Optional[ImageModelSettingsObjectDetection] = None, + search_space: Optional[List[ImageObjectDetectionSearchSpace]] = None, + **kwargs: Any, + ) -> None: + self._training_parameters: Optional[ImageModelSettingsObjectDetection] = None + + super().__init__(task_type=task_type, limits=limits, sweep=sweep, **kwargs) + + self.training_parameters = training_parameters # Assigning training_parameters through setter method. + + self._search_space = search_space + + @property + def training_parameters(self) -> Optional[ImageModelSettingsObjectDetection]: + return self._training_parameters + + @training_parameters.setter + def training_parameters(self, value: Union[Dict, ImageModelSettingsObjectDetection]) -> None: + if value is None: + self._training_parameters = None + elif isinstance(value, ImageModelSettingsObjectDetection): + self._training_parameters = value + # set_training_parameters convert parameter values from snake case str to enum. + # We need to add any future enum parameters in this call to support snake case str. + self.set_training_parameters( + optimizer=value.optimizer, + learning_rate_scheduler=value.learning_rate_scheduler, + model_size=value.model_size, + validation_metric_type=value.validation_metric_type, + log_training_metrics=value.log_training_metrics, + log_validation_loss=value.log_validation_loss, + ) + elif value is None: + self._training_parameters = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for model settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training_parameters(**value) + + @property + def search_space(self) -> Optional[List[ImageObjectDetectionSearchSpace]]: + return self._search_space + + @search_space.setter + def search_space(self, value: Union[List[Dict], List[SearchSpace]]) -> None: + if not isinstance(value, list): + msg = "Expected a list for search space." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + all_dict_type = all(isinstance(item, dict) for item in value) + all_search_space_type = all(isinstance(item, SearchSpace) for item in value) + + if all_search_space_type or all_dict_type: + self._search_space = [ + cast_to_specific_search_space(item, ImageObjectDetectionSearchSpace, self.task_type) # type: ignore + for item in value + ] + else: + msg = "Expected all items in the list to be either dictionaries or SearchSpace objects." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + # pylint: disable=too-many-locals + def set_training_parameters( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[Union[str, LearningRateScheduler]] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[Union[str, StochasticOptimizer]] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + box_detections_per_image: Optional[int] = None, + box_score_threshold: Optional[float] = None, + image_size: Optional[int] = None, + max_size: Optional[int] = None, + min_size: Optional[int] = None, + model_size: Optional[Union[str, ModelSize]] = None, + multi_scale: Optional[bool] = None, + nms_iou_threshold: Optional[float] = None, + tile_grid_size: Optional[str] = None, + tile_overlap_ratio: Optional[float] = None, + tile_predictions_nms_threshold: Optional[float] = None, + validation_iou_threshold: Optional[float] = None, + validation_metric_type: Optional[Union[str, ValidationMetricType]] = None, + log_training_metrics: Optional[Union[str, LogTrainingMetrics]] = None, + log_validation_loss: Optional[Union[str, LogValidationLoss]] = None, + ) -> None: + """Setting Image training parameters for for AutoML Image Object Detection and Image Instance Segmentation + tasks. + + :keyword advanced_settings: Settings for advanced scenarios. + :paramtype advanced_settings: str + :keyword ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :paramtype ams_gradient: bool + :keyword beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :paramtype beta1: float + :keyword beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :paramtype beta2: float + :keyword checkpoint_frequency: Frequency to store model checkpoints. Must be a positive + integer. + :paramtype checkpoint_frequency: int + :keyword checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :paramtype checkpoint_run_id: str + :keyword distributed: Whether to use distributed training. + :paramtype distributed: bool + :keyword early_stopping: Enable early stopping logic during training. + :paramtype early_stopping: bool + :keyword early_stopping_delay: Minimum number of epochs or validation evaluations to wait + before primary metric improvement + is tracked for early stopping. Must be a positive integer. + :paramtype early_stopping_delay: int + :keyword early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :paramtype early_stopping_patience: int + :keyword enable_onnx_normalization: Enable normalization when exporting ONNX model. + :paramtype enable_onnx_normalization: bool + :keyword evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. + Must be a positive integer. + :paramtype evaluation_frequency: int + :keyword gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :paramtype gradient_accumulation_step: int + :keyword layers_to_freeze: Number of layers to freeze for the model. Must be a positive + integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters#model-agnostic-hyperparameters. # pylint: disable=line-too-long + :type layers_to_freeze: int + :keyword learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :paramtype learning_rate: float + :keyword learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :keyword model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :keyword momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, + 1]. + :paramtype momentum: float + :keyword nesterov: Enable nesterov when optimizer is 'sgd'. + :paramtype nesterov: bool + :keyword number_of_epochs: Number of training epochs. Must be a positive integer. + :paramtype number_of_epochs: int + :keyword number_of_workers: Number of data loader workers. Must be a non-negative integer. + :paramtype number_of_workers: int + :keyword optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :keyword random_seed: Random seed to be used when using deterministic training. + :paramtype random_seed: int + :keyword step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float + in the range [0, 1]. + :paramtype step_lr_gamma: float + :keyword step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be + a positive integer. + :paramtype step_lr_step_size: int + :keyword training_batch_size: Training batch size. Must be a positive integer. + :paramtype training_batch_size: int + :keyword validation_batch_size: Validation batch size. Must be a positive integer. + :paramtype validation_batch_size: int + :keyword warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :paramtype warmup_cosine_lr_cycles: float + :keyword warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :paramtype warmup_cosine_lr_warmup_epochs: int + :keyword weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must + be a float in the range[0, 1]. + :paramtype weight_decay: float + :keyword box_detections_per_image: Maximum number of detections per image, for all classes. + Must be a positive integer. + Note: This settings is not supported for the 'yolov5' algorithm. + :type box_detections_per_image: int + :keyword box_score_threshold: During inference, only return proposals with a classification + score greater than + BoxScoreThreshold. Must be a float in the range[0, 1]. + :paramtype box_score_threshold: float + :keyword image_size: Image size for training and validation. Must be a positive integer. + Note: The training run may get into CUDA OOM if the size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type image_size: int + :keyword max_size: Maximum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type max_size: int + :keyword min_size: Minimum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type min_size: int + :keyword model_size: Model size. Must be 'small', 'medium', 'large', or 'extra_large'. + Note: training run may get into CUDA OOM if the model size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type model_size: str or ~azure.mgmt.machinelearningservices.models.ModelSize + :keyword multi_scale: Enable multi-scale image by varying image size by +/- 50%. + Note: training run may get into CUDA OOM if no sufficient GPU memory. + Note: This settings is only supported for the 'yolov5' algorithm. + :type multi_scale: bool + :keyword nms_iou_threshold: IOU threshold used during inference in NMS post processing. Must be + float in the range [0, 1]. + :paramtype nms_iou_threshold: float + :keyword tile_grid_size: The grid size to use for tiling each image. Note: TileGridSize must + not be + None to enable small object detection logic. A string containing two integers in mxn format. + :type tile_grid_size: str + :keyword tile_overlap_ratio: Overlap ratio between adjacent tiles in each dimension. Must be + float in the range [0, 1). + :paramtype tile_overlap_ratio: float + :keyword tile_predictions_nms_threshold: The IOU threshold to use to perform NMS while merging + predictions from tiles and image. + Used in validation/ inference. Must be float in the range [0, 1]. + NMS: Non-maximum suppression. + :type tile_predictions_nms_threshold: str + :keyword validation_iou_threshold: IOU threshold to use when computing validation metric. Must + be float in the range [0, 1]. + :paramtype validation_iou_threshold: float + :keyword validation_metric_type: Metric computation method to use for validation metrics. Must + be 'none', 'coco', 'voc', or 'coco_voc'. + :paramtype validation_metric_type: str or ~azure.mgmt.machinelearningservices.models.ValidationMetricType + :keyword log_training_metrics: indicates whether or not to log training metrics. Must + be 'Enable' or 'Disable' + :paramtype log_training_metrics: str or ~azure.mgmt.machinelearningservices.models.LogTrainingMetrics + :keyword log_validation_loss: indicates whether or not to log validation loss. Must + be 'Enable' or 'Disable' + :paramtype log_validation_loss: str or ~azure.mgmt.machinelearningservices.models.LogValidationLoss + """ + self._training_parameters = self._training_parameters or ImageModelSettingsObjectDetection() + + self._training_parameters.advanced_settings = ( + advanced_settings if advanced_settings is not None else self._training_parameters.advanced_settings + ) + self._training_parameters.ams_gradient = ( + ams_gradient if ams_gradient is not None else self._training_parameters.ams_gradient + ) + self._training_parameters.beta1 = beta1 if beta1 is not None else self._training_parameters.beta1 + self._training_parameters.beta2 = beta2 if beta2 is not None else self._training_parameters.beta2 + self._training_parameters.checkpoint_frequency = ( + checkpoint_frequency if checkpoint_frequency is not None else self._training_parameters.checkpoint_frequency + ) + self._training_parameters.checkpoint_run_id = ( + checkpoint_run_id if checkpoint_run_id is not None else self._training_parameters.checkpoint_run_id + ) + self._training_parameters.distributed = ( + distributed if distributed is not None else self._training_parameters.distributed + ) + self._training_parameters.early_stopping = ( + early_stopping if early_stopping is not None else self._training_parameters.early_stopping + ) + self._training_parameters.early_stopping_delay = ( + early_stopping_delay if early_stopping_delay is not None else self._training_parameters.early_stopping_delay + ) + self._training_parameters.early_stopping_patience = ( + early_stopping_patience + if early_stopping_patience is not None + else self._training_parameters.early_stopping_patience + ) + self._training_parameters.enable_onnx_normalization = ( + enable_onnx_normalization + if enable_onnx_normalization is not None + else self._training_parameters.enable_onnx_normalization + ) + self._training_parameters.evaluation_frequency = ( + evaluation_frequency if evaluation_frequency is not None else self._training_parameters.evaluation_frequency + ) + self._training_parameters.gradient_accumulation_step = ( + gradient_accumulation_step + if gradient_accumulation_step is not None + else self._training_parameters.gradient_accumulation_step + ) + self._training_parameters.layers_to_freeze = ( + layers_to_freeze if layers_to_freeze is not None else self._training_parameters.layers_to_freeze + ) + self._training_parameters.learning_rate = ( + learning_rate if learning_rate is not None else self._training_parameters.learning_rate + ) + self._training_parameters.learning_rate_scheduler = ( + LearningRateScheduler[camel_to_snake(learning_rate_scheduler)] + if learning_rate_scheduler is not None + else self._training_parameters.learning_rate_scheduler + ) + self._training_parameters.model_name = ( + model_name if model_name is not None else self._training_parameters.model_name + ) + self._training_parameters.momentum = momentum if momentum is not None else self._training_parameters.momentum + self._training_parameters.nesterov = nesterov if nesterov is not None else self._training_parameters.nesterov + self._training_parameters.number_of_epochs = ( + number_of_epochs if number_of_epochs is not None else self._training_parameters.number_of_epochs + ) + self._training_parameters.number_of_workers = ( + number_of_workers if number_of_workers is not None else self._training_parameters.number_of_workers + ) + self._training_parameters.optimizer = ( + StochasticOptimizer[camel_to_snake(optimizer)] + if optimizer is not None + else self._training_parameters.optimizer + ) + self._training_parameters.random_seed = ( + random_seed if random_seed is not None else self._training_parameters.random_seed + ) + self._training_parameters.step_lr_gamma = ( + step_lr_gamma if step_lr_gamma is not None else self._training_parameters.step_lr_gamma + ) + self._training_parameters.step_lr_step_size = ( + step_lr_step_size if step_lr_step_size is not None else self._training_parameters.step_lr_step_size + ) + self._training_parameters.training_batch_size = ( + training_batch_size if training_batch_size is not None else self._training_parameters.training_batch_size + ) + self._training_parameters.validation_batch_size = ( + validation_batch_size + if validation_batch_size is not None + else self._training_parameters.validation_batch_size + ) + self._training_parameters.warmup_cosine_lr_cycles = ( + warmup_cosine_lr_cycles + if warmup_cosine_lr_cycles is not None + else self._training_parameters.warmup_cosine_lr_cycles + ) + self._training_parameters.warmup_cosine_lr_warmup_epochs = ( + warmup_cosine_lr_warmup_epochs + if warmup_cosine_lr_warmup_epochs is not None + else self._training_parameters.warmup_cosine_lr_warmup_epochs + ) + self._training_parameters.weight_decay = ( + weight_decay if weight_decay is not None else self._training_parameters.weight_decay + ) + self._training_parameters.box_detections_per_image = ( + box_detections_per_image + if box_detections_per_image is not None + else self._training_parameters.box_detections_per_image + ) + self._training_parameters.box_score_threshold = ( + box_score_threshold if box_score_threshold is not None else self._training_parameters.box_score_threshold + ) + self._training_parameters.image_size = ( + image_size if image_size is not None else self._training_parameters.image_size + ) + self._training_parameters.max_size = max_size if max_size is not None else self._training_parameters.max_size + self._training_parameters.min_size = min_size if min_size is not None else self._training_parameters.min_size + self._training_parameters.model_size = ( + ModelSize[camel_to_snake(model_size)] if model_size is not None else self._training_parameters.model_size + ) + self._training_parameters.multi_scale = ( + multi_scale if multi_scale is not None else self._training_parameters.multi_scale + ) + self._training_parameters.nms_iou_threshold = ( + nms_iou_threshold if nms_iou_threshold is not None else self._training_parameters.nms_iou_threshold + ) + self._training_parameters.tile_grid_size = ( + tile_grid_size if tile_grid_size is not None else self._training_parameters.tile_grid_size + ) + self._training_parameters.tile_overlap_ratio = ( + tile_overlap_ratio if tile_overlap_ratio is not None else self._training_parameters.tile_overlap_ratio + ) + self._training_parameters.tile_predictions_nms_threshold = ( + tile_predictions_nms_threshold + if tile_predictions_nms_threshold is not None + else self._training_parameters.tile_predictions_nms_threshold + ) + self._training_parameters.validation_iou_threshold = ( + validation_iou_threshold + if validation_iou_threshold is not None + else self._training_parameters.validation_iou_threshold + ) + self._training_parameters.validation_metric_type = ( + ValidationMetricType[camel_to_snake(validation_metric_type)] + if validation_metric_type is not None + else self._training_parameters.validation_metric_type + ) + self._training_parameters.log_training_metrics = ( + LogTrainingMetrics[camel_to_snake(log_training_metrics)] + if log_training_metrics is not None + else self._training_parameters.log_training_metrics + ) + self._training_parameters.log_validation_loss = ( + LogValidationLoss[camel_to_snake(log_validation_loss)] + if log_validation_loss is not None + else self._training_parameters.log_validation_loss + ) + + # pylint: enable=too-many-locals + + def extend_search_space( + self, + value: Union[SearchSpace, List[SearchSpace]], + ) -> None: + """Add search space for AutoML Image Object Detection and Image Instance Segmentation tasks. + + :param value: Search through the parameter space + :type value: Union[SearchSpace, List[SearchSpace]] + """ + self._search_space = self._search_space or [] + + if isinstance(value, list): + self._search_space.extend( + [ + cast_to_specific_search_space(item, ImageObjectDetectionSearchSpace, self.task_type) # type: ignore + for item in value + ] + ) + else: + self._search_space.append( + cast_to_specific_search_space(value, ImageObjectDetectionSearchSpace, self.task_type) # type: ignore + ) + + @classmethod + def _get_search_space_from_str(cls, search_space_str: str) -> Optional[List[ImageObjectDetectionSearchSpace]]: + return ( + [ + ImageObjectDetectionSearchSpace._from_rest_object(entry) + for entry in search_space_str + if entry is not None + ] + if search_space_str is not None + else None + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, AutoMLImageObjectDetectionBase): + return NotImplemented + + if not super().__eq__(other): + return False + + return self._training_parameters == other._training_parameters and self._search_space == other._search_space + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_job.py new file mode 100644 index 00000000..a1b9dbc3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_job.py @@ -0,0 +1,244 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageClassification as RestImageClassification +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.image.automl_image_classification_base import AutoMLImageClassificationBase +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsClassification +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ImageClassificationJob(AutoMLImageClassificationBase): + """Configuration for AutoML multi-class Image Classification job. + + :param primary_metric: The primary metric to use for optimization. + :type primary_metric: Optional[str, ~azure.ai.ml.automl.ClassificationMultilabelPrimaryMetrics] + :param kwargs: Job-specific arguments. + :type kwargs: Dict[str, Any] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_classification_job] + :end-before: [END automl.automl_image_job.image_classification_job] + :language: python + :dedent: 8 + :caption: creating an automl image classification job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + primary_metric: Optional[Union[str, ClassificationPrimaryMetrics]] = None, + **kwargs: Any, + ) -> None: + + # Extract any super class init settings + limits = kwargs.pop("limits", None) + sweep = kwargs.pop("sweep", None) + training_parameters = kwargs.pop("training_parameters", None) + search_space = kwargs.pop("search_space", None) + + super().__init__( + task_type=TaskType.IMAGE_CLASSIFICATION, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=search_space, + **kwargs, + ) + + self.primary_metric = primary_metric or ImageClassificationJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Optional[Union[str, ClassificationPrimaryMetrics]]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ImageClassificationJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + image_classification_task = RestImageClassification( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + model_settings=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest obj + self._resolve_data_inputs(image_classification_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=image_classification_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ImageClassificationJob": + properties: RestAutoMLJob = obj.properties + task_details: RestImageClassification = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + image_classification_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + limits=( + ImageLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + sweep=( + ImageSweepSettings._from_rest_object(task_details.sweep_settings) + if task_details.sweep_settings + else None + ), + training_parameters=( + ImageModelSettingsClassification._from_rest_object(task_details.model_settings) + if task_details.model_settings + else None + ), + search_space=cls._get_search_space_from_str(task_details.search_space), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + image_classification_job._restore_data_inputs() + + return image_classification_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ImageClassificationJob": + from azure.ai.ml._schema.automl.image_vertical.image_classification import ImageClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMulticlassNodeSchema + + inside_pipeline = kwargs.pop("inside_pipeline", False) + if inside_pipeline: + if context.get("inside_pipeline", None) is None: + context["inside_pipeline"] = True + loaded_data = load_from_dict( + ImageClassificationMulticlassNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(ImageClassificationSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ImageClassificationJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + } + job = ImageClassificationJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.image_vertical.image_classification import ImageClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMulticlassNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = ImageClassificationMulticlassNodeSchema( + context={BASE_PATH_CONTEXT_KEY: "./", "inside_pipeline": True} + ).dump(self) + else: + schema_dict = ImageClassificationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageClassificationJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_multilabel_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_multilabel_job.py new file mode 100644 index 00000000..541f41c7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_multilabel_job.py @@ -0,0 +1,252 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationMultilabelPrimaryMetrics +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ImageClassificationMultilabel as RestImageClassificationMultilabel, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.image.automl_image_classification_base import AutoMLImageClassificationBase +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsClassification +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ImageClassificationMultilabelJob(AutoMLImageClassificationBase): + """Configuration for AutoML multi-label Image Classification job. + + :param primary_metric: The primary metric to use for optimization. + :type primary_metric: Optional[str, ~azure.ai.ml.automl.ClassificationMultilabelPrimaryMetrics] + :param kwargs: Job-specific arguments. + :type kwargs: Dict[str, Any] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_classification_multilabel_job] + :end-before: [END automl.automl_image_job.image_classification_multilabel_job] + :language: python + :dedent: 8 + :caption: creating an automl image classification multilabel job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationMultilabelPrimaryMetrics.IOU + + def __init__( + self, + *, + primary_metric: Optional[Union[str, ClassificationMultilabelPrimaryMetrics]] = None, + **kwargs: Any, + ) -> None: + + # Extract any super class init settings + limits = kwargs.pop("limits", None) + sweep = kwargs.pop("sweep", None) + training_parameters = kwargs.pop("training_parameters", None) + search_space = kwargs.pop("search_space", None) + + super().__init__( + task_type=TaskType.IMAGE_CLASSIFICATION_MULTILABEL, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=search_space, + **kwargs, + ) + + self.primary_metric = primary_metric or ImageClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Union[str, ClassificationMultilabelPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationMultilabelPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ImageClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationMultilabelPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + image_classification_multilabel_task = RestImageClassificationMultilabel( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + model_settings=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest obj + self._resolve_data_inputs(image_classification_multilabel_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=image_classification_multilabel_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ImageClassificationMultilabelJob": + properties: RestAutoMLJob = obj.properties + task_details: RestImageClassificationMultilabel = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + image_classification_multilabel_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + limits=( + ImageLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + sweep=( + ImageSweepSettings._from_rest_object(task_details.sweep_settings) + if task_details.sweep_settings + else None + ), + training_parameters=( + ImageModelSettingsClassification._from_rest_object(task_details.model_settings) + if task_details.model_settings + else None + ), + search_space=cls._get_search_space_from_str(task_details.search_space), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + image_classification_multilabel_job._restore_data_inputs() + + return image_classification_multilabel_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ImageClassificationMultilabelJob": + from azure.ai.ml._schema.automl.image_vertical.image_classification import ImageClassificationMultilabelSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMultilabelNodeSchema + + inside_pipeline = kwargs.pop("inside_pipeline", False) + if inside_pipeline: + if context.get("inside_pipeline", None) is None: + context["inside_pipeline"] = True + loaded_data = load_from_dict( + ImageClassificationMultilabelNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict( + ImageClassificationMultilabelSchema, + data, + context, + additional_message, + **kwargs, + ) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ImageClassificationMultilabelJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + } + job = ImageClassificationMultilabelJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.image_vertical.image_classification import ImageClassificationMultilabelSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMultilabelNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = ImageClassificationMultilabelNodeSchema( + context={BASE_PATH_CONTEXT_KEY: "./", "inside_pipeline": True} + ).dump(self) + else: + schema_dict = ImageClassificationMultilabelSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageClassificationMultilabelJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_search_space.py new file mode 100644 index 00000000..0691f243 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_search_space.py @@ -0,0 +1,437 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=R0902,too-many-locals + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageModelDistributionSettingsClassification +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.search_space_utils import _convert_from_rest_object, _convert_to_rest_object +from azure.ai.ml.entities._job.sweep.search_space import SweepDistribution +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ImageClassificationSearchSpace(RestTranslatableMixin): + """Search space for AutoML Image Classification and Image Classification + Multilabel tasks. + + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :type beta1: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :type beta2: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param distributed: Whether to use distributer training. + :type distributed: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait + before primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :type early_stopping_patience: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. + Must be a positive integer. + :type evaluation_frequency: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive + integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters#model-agnostic-hyperparameters. # pylint: disable=line-too-long + :type layers_to_freeze: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. + :type learning_rate_scheduler: str or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, + 1]. + :type momentum: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param optimizer: Type of optimizer. Must be either 'sgd', 'adam', or 'adamw'. + :type optimizer: str or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float + in the range [0, 1]. + :type step_lr_gamma: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be + a positive integer. + :type step_lr_step_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must + be a float in the range[0, 1]. + :type weight_decay: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param training_crop_size: Image crop size that is input to the neural network for the + training dataset. Must be a positive integer. + :type training_crop_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param validation_crop_size: Image crop size that is input to the neural network for the + validation dataset. Must be a positive integer. + :type validation_crop_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param validation_resize_size: Image size to which to resize before cropping for validation + dataset. Must be a positive integer. + :type validation_resize_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param weighted_loss: Weighted loss. The accepted values are 0 for no weighted loss. + 1 for weighted loss with sqrt.(class_weights). 2 for weighted loss with class_weights. Must be + 0 or 1 or 2. + :type weighted_loss: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_classification_search_space] + :end-before: [END automl.automl_image_job.image_classification_search_space] + :language: python + :dedent: 8 + :caption: Defining an automl image classification search space + """ + + def __init__( + self, + *, + ams_gradient: Optional[Union[bool, SweepDistribution]] = None, + beta1: Optional[Union[float, SweepDistribution]] = None, + beta2: Optional[Union[float, SweepDistribution]] = None, + distributed: Optional[Union[bool, SweepDistribution]] = None, + early_stopping: Optional[Union[bool, SweepDistribution]] = None, + early_stopping_delay: Optional[Union[int, SweepDistribution]] = None, + early_stopping_patience: Optional[Union[int, SweepDistribution]] = None, + enable_onnx_normalization: Optional[Union[bool, SweepDistribution]] = None, + evaluation_frequency: Optional[Union[int, SweepDistribution]] = None, + gradient_accumulation_step: Optional[Union[int, SweepDistribution]] = None, + layers_to_freeze: Optional[Union[int, SweepDistribution]] = None, + learning_rate: Optional[Union[float, SweepDistribution]] = None, + learning_rate_scheduler: Optional[Union[str, SweepDistribution]] = None, + model_name: Optional[Union[str, SweepDistribution]] = None, + momentum: Optional[Union[float, SweepDistribution]] = None, + nesterov: Optional[Union[bool, SweepDistribution]] = None, + number_of_epochs: Optional[Union[int, SweepDistribution]] = None, + number_of_workers: Optional[Union[int, SweepDistribution]] = None, + optimizer: Optional[Union[str, SweepDistribution]] = None, + random_seed: Optional[Union[int, SweepDistribution]] = None, + step_lr_gamma: Optional[Union[float, SweepDistribution]] = None, + step_lr_step_size: Optional[Union[int, SweepDistribution]] = None, + training_batch_size: Optional[Union[int, SweepDistribution]] = None, + validation_batch_size: Optional[Union[int, SweepDistribution]] = None, + warmup_cosine_lr_cycles: Optional[Union[float, SweepDistribution]] = None, + warmup_cosine_lr_warmup_epochs: Optional[Union[int, SweepDistribution]] = None, + weight_decay: Optional[Union[float, SweepDistribution]] = None, + training_crop_size: Optional[Union[int, SweepDistribution]] = None, + validation_crop_size: Optional[Union[int, SweepDistribution]] = None, + validation_resize_size: Optional[Union[int, SweepDistribution]] = None, + weighted_loss: Optional[Union[int, SweepDistribution]] = None, + ) -> None: + self.ams_gradient = ams_gradient + self.beta1 = beta1 + self.beta2 = beta2 + self.distributed = distributed + self.early_stopping = early_stopping + self.early_stopping_delay = early_stopping_delay + self.early_stopping_patience = early_stopping_patience + self.enable_onnx_normalization = enable_onnx_normalization + self.evaluation_frequency = evaluation_frequency + self.gradient_accumulation_step = gradient_accumulation_step + self.layers_to_freeze = layers_to_freeze + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.momentum = momentum + self.nesterov = nesterov + self.number_of_epochs = number_of_epochs + self.number_of_workers = number_of_workers + self.optimizer = optimizer + self.random_seed = random_seed + self.step_lr_gamma = step_lr_gamma + self.step_lr_step_size = step_lr_step_size + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_cosine_lr_cycles = warmup_cosine_lr_cycles + self.warmup_cosine_lr_warmup_epochs = warmup_cosine_lr_warmup_epochs + self.weight_decay = weight_decay + self.training_crop_size = training_crop_size + self.validation_crop_size = validation_crop_size + self.validation_resize_size = validation_resize_size + self.weighted_loss = weighted_loss + + def _to_rest_object(self) -> ImageModelDistributionSettingsClassification: + return ImageModelDistributionSettingsClassification( + ams_gradient=_convert_to_rest_object(self.ams_gradient) if self.ams_gradient is not None else None, + beta1=_convert_to_rest_object(self.beta1) if self.beta1 is not None else None, + beta2=_convert_to_rest_object(self.beta2) if self.beta2 is not None else None, + distributed=_convert_to_rest_object(self.distributed) if self.distributed is not None else None, + early_stopping=_convert_to_rest_object(self.early_stopping) if self.early_stopping is not None else None, + early_stopping_delay=( + _convert_to_rest_object(self.early_stopping_delay) if self.early_stopping_delay is not None else None + ), + early_stopping_patience=( + _convert_to_rest_object(self.early_stopping_patience) + if self.early_stopping_patience is not None + else None + ), + enable_onnx_normalization=( + _convert_to_rest_object(self.enable_onnx_normalization) + if self.enable_onnx_normalization is not None + else None + ), + evaluation_frequency=( + _convert_to_rest_object(self.evaluation_frequency) if self.evaluation_frequency is not None else None + ), + gradient_accumulation_step=( + _convert_to_rest_object(self.gradient_accumulation_step) + if self.gradient_accumulation_step is not None + else None + ), + layers_to_freeze=( + _convert_to_rest_object(self.layers_to_freeze) if self.layers_to_freeze is not None else None + ), + learning_rate=_convert_to_rest_object(self.learning_rate) if self.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_to_rest_object(self.learning_rate_scheduler) + if self.learning_rate_scheduler is not None + else None + ), + model_name=_convert_to_rest_object(self.model_name) if self.model_name is not None else None, + momentum=_convert_to_rest_object(self.momentum) if self.momentum is not None else None, + nesterov=_convert_to_rest_object(self.nesterov) if self.nesterov is not None else None, + number_of_epochs=( + _convert_to_rest_object(self.number_of_epochs) if self.number_of_epochs is not None else None + ), + number_of_workers=( + _convert_to_rest_object(self.number_of_workers) if self.number_of_workers is not None else None + ), + optimizer=_convert_to_rest_object(self.optimizer) if self.optimizer is not None else None, + random_seed=_convert_to_rest_object(self.random_seed) if self.random_seed is not None else None, + step_lr_gamma=_convert_to_rest_object(self.step_lr_gamma) if self.step_lr_gamma is not None else None, + step_lr_step_size=( + _convert_to_rest_object(self.step_lr_step_size) if self.step_lr_step_size is not None else None + ), + training_batch_size=( + _convert_to_rest_object(self.training_batch_size) if self.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_to_rest_object(self.validation_batch_size) if self.validation_batch_size is not None else None + ), + warmup_cosine_lr_cycles=( + _convert_to_rest_object(self.warmup_cosine_lr_cycles) + if self.warmup_cosine_lr_cycles is not None + else None + ), + warmup_cosine_lr_warmup_epochs=( + _convert_to_rest_object(self.warmup_cosine_lr_warmup_epochs) + if self.warmup_cosine_lr_warmup_epochs is not None + else None + ), + weight_decay=_convert_to_rest_object(self.weight_decay) if self.weight_decay is not None else None, + training_crop_size=( + _convert_to_rest_object(self.training_crop_size) if self.training_crop_size is not None else None + ), + validation_crop_size=( + _convert_to_rest_object(self.validation_crop_size) if self.validation_crop_size is not None else None + ), + validation_resize_size=( + _convert_to_rest_object(self.validation_resize_size) + if self.validation_resize_size is not None + else None + ), + weighted_loss=_convert_to_rest_object(self.weighted_loss) if self.weighted_loss is not None else None, + ) + + @classmethod + def _from_rest_object(cls, obj: ImageModelDistributionSettingsClassification) -> "ImageClassificationSearchSpace": + return cls( + ams_gradient=_convert_from_rest_object(obj.ams_gradient) if obj.ams_gradient is not None else None, + beta1=_convert_from_rest_object(obj.beta1) if obj.beta1 is not None else None, + beta2=_convert_from_rest_object(obj.beta2) if obj.beta2 is not None else None, + distributed=_convert_from_rest_object(obj.distributed) if obj.distributed is not None else None, + early_stopping=_convert_from_rest_object(obj.early_stopping) if obj.early_stopping is not None else None, + early_stopping_delay=( + _convert_from_rest_object(obj.early_stopping_delay) if obj.early_stopping_delay is not None else None + ), + early_stopping_patience=( + _convert_from_rest_object(obj.early_stopping_patience) + if obj.early_stopping_patience is not None + else None + ), + enable_onnx_normalization=( + _convert_from_rest_object(obj.enable_onnx_normalization) + if obj.enable_onnx_normalization is not None + else None + ), + evaluation_frequency=( + _convert_from_rest_object(obj.evaluation_frequency) if obj.evaluation_frequency is not None else None + ), + gradient_accumulation_step=( + _convert_from_rest_object(obj.gradient_accumulation_step) + if obj.gradient_accumulation_step is not None + else None + ), + layers_to_freeze=( + _convert_from_rest_object(obj.layers_to_freeze) if obj.layers_to_freeze is not None else None + ), + learning_rate=_convert_from_rest_object(obj.learning_rate) if obj.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_from_rest_object(obj.learning_rate_scheduler) + if obj.learning_rate_scheduler is not None + else None + ), + model_name=_convert_from_rest_object(obj.model_name) if obj.model_name is not None else None, + momentum=_convert_from_rest_object(obj.momentum) if obj.momentum is not None else None, + nesterov=_convert_from_rest_object(obj.nesterov) if obj.nesterov is not None else None, + number_of_epochs=( + _convert_from_rest_object(obj.number_of_epochs) if obj.number_of_epochs is not None else None + ), + number_of_workers=( + _convert_from_rest_object(obj.number_of_workers) if obj.number_of_workers is not None else None + ), + optimizer=_convert_from_rest_object(obj.optimizer) if obj.optimizer is not None else None, + random_seed=_convert_from_rest_object(obj.random_seed) if obj.random_seed is not None else None, + step_lr_gamma=_convert_from_rest_object(obj.step_lr_gamma) if obj.step_lr_gamma is not None else None, + step_lr_step_size=( + _convert_from_rest_object(obj.step_lr_step_size) if obj.step_lr_step_size is not None else None + ), + training_batch_size=( + _convert_from_rest_object(obj.training_batch_size) if obj.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_from_rest_object(obj.validation_batch_size) if obj.validation_batch_size is not None else None + ), + warmup_cosine_lr_cycles=( + _convert_from_rest_object(obj.warmup_cosine_lr_cycles) + if obj.warmup_cosine_lr_cycles is not None + else None + ), + warmup_cosine_lr_warmup_epochs=( + _convert_from_rest_object(obj.warmup_cosine_lr_warmup_epochs) + if obj.warmup_cosine_lr_warmup_epochs is not None + else None + ), + weight_decay=_convert_from_rest_object(obj.weight_decay) if obj.weight_decay is not None else None, + training_crop_size=( + _convert_from_rest_object(obj.training_crop_size) if obj.training_crop_size is not None else None + ), + validation_crop_size=( + _convert_from_rest_object(obj.validation_crop_size) if obj.validation_crop_size is not None else None + ), + validation_resize_size=( + _convert_from_rest_object(obj.validation_resize_size) + if obj.validation_resize_size is not None + else None + ), + weighted_loss=_convert_from_rest_object(obj.weighted_loss) if obj.weighted_loss is not None else None, + ) + + @classmethod + def _from_search_space_object(cls, obj: SearchSpace) -> "ImageClassificationSearchSpace": + return cls( + ams_gradient=obj.ams_gradient if hasattr(obj, "ams_gradient") else None, + beta1=obj.beta1 if hasattr(obj, "beta1") else None, + beta2=obj.beta2 if hasattr(obj, "beta2") else None, + distributed=obj.distributed if hasattr(obj, "distributed") else None, + early_stopping=obj.early_stopping if hasattr(obj, "early_stopping") else None, + early_stopping_delay=obj.early_stopping_delay if hasattr(obj, "early_stopping_delay") else None, + early_stopping_patience=obj.early_stopping_patience if hasattr(obj, "early_stopping_patience") else None, + enable_onnx_normalization=( + obj.enable_onnx_normalization if hasattr(obj, "enable_onnx_normalization") else None + ), + evaluation_frequency=obj.evaluation_frequency if hasattr(obj, "evaluation_frequency") else None, + gradient_accumulation_step=( + obj.gradient_accumulation_step if hasattr(obj, "gradient_accumulation_step") else None + ), + layers_to_freeze=obj.layers_to_freeze if hasattr(obj, "layers_to_freeze") else None, + learning_rate=obj.learning_rate if hasattr(obj, "learning_rate") else None, + learning_rate_scheduler=obj.learning_rate_scheduler if hasattr(obj, "learning_rate_scheduler") else None, + model_name=obj.model_name if hasattr(obj, "model_name") else None, + momentum=obj.momentum if hasattr(obj, "momentum") else None, + nesterov=obj.nesterov if hasattr(obj, "nesterov") else None, + number_of_epochs=obj.number_of_epochs if hasattr(obj, "number_of_epochs") else None, + number_of_workers=obj.number_of_workers if hasattr(obj, "number_of_workers") else None, + optimizer=obj.optimizer if hasattr(obj, "optimizer") else None, + random_seed=obj.random_seed if hasattr(obj, "random_seed") else None, + step_lr_gamma=obj.step_lr_gamma if hasattr(obj, "step_lr_gamma") else None, + step_lr_step_size=obj.step_lr_step_size if hasattr(obj, "step_lr_step_size") else None, + training_batch_size=obj.training_batch_size if hasattr(obj, "training_batch_size") else None, + validation_batch_size=obj.validation_batch_size if hasattr(obj, "validation_batch_size") else None, + warmup_cosine_lr_cycles=obj.warmup_cosine_lr_cycles if hasattr(obj, "warmup_cosine_lr_cycles") else None, + warmup_cosine_lr_warmup_epochs=( + obj.warmup_cosine_lr_warmup_epochs if hasattr(obj, "warmup_cosine_lr_warmup_epochs") else None + ), + weight_decay=obj.weight_decay if hasattr(obj, "weight_decay") else None, + training_crop_size=obj.training_crop_size if hasattr(obj, "training_crop_size") else None, + validation_crop_size=obj.validation_crop_size if hasattr(obj, "validation_crop_size") else None, + validation_resize_size=obj.validation_resize_size if hasattr(obj, "validation_resize_size") else None, + weighted_loss=obj.weighted_loss if hasattr(obj, "weighted_loss") else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageClassificationSearchSpace): + return NotImplemented + + return ( + self.ams_gradient == other.ams_gradient + and self.beta1 == other.beta1 + and self.beta2 == other.beta2 + and self.distributed == other.distributed + and self.early_stopping == other.early_stopping + and self.early_stopping_delay == other.early_stopping_delay + and self.early_stopping_patience == other.early_stopping_patience + and self.enable_onnx_normalization == other.enable_onnx_normalization + and self.evaluation_frequency == other.evaluation_frequency + and self.gradient_accumulation_step == other.gradient_accumulation_step + and self.layers_to_freeze == other.layers_to_freeze + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.momentum == other.momentum + and self.nesterov == other.nesterov + and self.number_of_epochs == other.number_of_epochs + and self.number_of_workers == other.number_of_workers + and self.optimizer == other.optimizer + and self.random_seed == other.random_seed + and self.step_lr_gamma == other.step_lr_gamma + and self.step_lr_step_size == other.step_lr_step_size + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_cosine_lr_cycles == other.warmup_cosine_lr_cycles + and self.warmup_cosine_lr_warmup_epochs == other.warmup_cosine_lr_warmup_epochs + and self.weight_decay == other.weight_decay + and self.training_crop_size == other.training_crop_size + and self.validation_crop_size == other.validation_crop_size + and self.validation_resize_size == other.validation_resize_size + and self.weighted_loss == other.weighted_loss + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_instance_segmentation_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_instance_segmentation_job.py new file mode 100644 index 00000000..c97d3c11 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_instance_segmentation_job.py @@ -0,0 +1,249 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ImageInstanceSegmentation as RestImageInstanceSegmentation, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import InstanceSegmentationPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.image.automl_image_object_detection_base import AutoMLImageObjectDetectionBase +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsObjectDetection +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ImageInstanceSegmentationJob(AutoMLImageObjectDetectionBase): + """Configuration for AutoML Image Instance Segmentation job. + + :keyword primary_metric: The primary metric to use for optimization. + :paramtype primary_metric: Optional[str, ~azure.ai.ml.automl.InstanceSegmentationPrimaryMetrics] + :keyword kwargs: Job-specific arguments. + :paramtype kwargs: Dict[str, Any] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_instance_segmentation_job] + :end-before: [END automl.automl_image_job.image_instance_segmentation_job] + :language: python + :dedent: 8 + :caption: creating an automl image instance segmentation job + """ + + _DEFAULT_PRIMARY_METRIC = InstanceSegmentationPrimaryMetrics.MEAN_AVERAGE_PRECISION + + def __init__( + self, + *, + primary_metric: Optional[Union[str, InstanceSegmentationPrimaryMetrics]] = None, + **kwargs: Any, + ) -> None: + # Extract any super class init settings + limits = kwargs.pop("limits", None) + sweep = kwargs.pop("sweep", None) + training_parameters = kwargs.pop("training_parameters", None) + search_space = kwargs.pop("search_space", None) + + super().__init__( + task_type=TaskType.IMAGE_INSTANCE_SEGMENTATION, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=search_space, + **kwargs, + ) + self.primary_metric = primary_metric or ImageInstanceSegmentationJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Union[str, InstanceSegmentationPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, InstanceSegmentationPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ImageInstanceSegmentationJob._DEFAULT_PRIMARY_METRIC + if value is None + else InstanceSegmentationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + image_instance_segmentation_task = RestImageInstanceSegmentation( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + model_settings=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest obj + self._resolve_data_inputs(image_instance_segmentation_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=image_instance_segmentation_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ImageInstanceSegmentationJob": + properties: RestAutoMLJob = obj.properties + task_details: RestImageInstanceSegmentation = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + image_instance_segmentation_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + limits=( + ImageLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + sweep=( + ImageSweepSettings._from_rest_object(task_details.sweep_settings) + if task_details.sweep_settings + else None + ), + training_parameters=( + ImageModelSettingsObjectDetection._from_rest_object(task_details.model_settings) + if task_details.model_settings + else None + ), + search_space=cls._get_search_space_from_str(task_details.search_space), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + image_instance_segmentation_job._restore_data_inputs() + + return image_instance_segmentation_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ImageInstanceSegmentationJob": + from azure.ai.ml._schema.automl.image_vertical.image_object_detection import ImageInstanceSegmentationSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageInstanceSegmentationNodeSchema + + inside_pipeline = kwargs.pop("inside_pipeline", False) + if inside_pipeline: + if context.get("inside_pipeline", None) is None: + context["inside_pipeline"] = True + loaded_data = load_from_dict( + ImageInstanceSegmentationNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict( + ImageInstanceSegmentationSchema, + data, + context, + additional_message, + **kwargs, + ) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ImageInstanceSegmentationJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + } + job = ImageInstanceSegmentationJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.image_vertical.image_object_detection import ImageInstanceSegmentationSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageInstanceSegmentationNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = ImageInstanceSegmentationNodeSchema( + context={BASE_PATH_CONTEXT_KEY: "./", "inside_pipeline": True} + ).dump(self) + else: + schema_dict = ImageInstanceSegmentationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageInstanceSegmentationJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_limit_settings.py new file mode 100644 index 00000000..12ec8b57 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_limit_settings.py @@ -0,0 +1,117 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageLimitSettings as RestImageLimitSettings +from azure.ai.ml._utils.utils import from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ImageLimitSettings(RestTranslatableMixin): + r"""Limit settings for AutoML Image Verticals. + + ImageLimitSettings is a class that contains the following parameters: max_concurrent_trials, max_trials, and \ + timeout_minutes. + + This is an optional configuration method to configure limits parameters such as timeouts etc. + + .. note:: + + The number of concurrent runs is gated on the resources available in the specified compute target. + Ensure that the compute target has the available resources for the desired concurrency. + + :keyword max_concurrent_trials: Maximum number of concurrent AutoML iterations, defaults to None. + :paramtype max_concurrent_trials: typing.Optional[int] + :keyword max_trials: Represents the maximum number of trials (children jobs). + :paramtype max_trials: typing.Optional[int] + :keyword timeout_minutes: AutoML job timeout. Defaults to None + :paramtype timeout_minutes: typing.Optional[int] + :raises ValueError: If max_concurrent_trials is not None and is not a positive integer. + :raises ValueError: If max_trials is not None and is not a positive integer. + :raises ValueError: If timeout_minutes is not None and is not a positive integer. + :return: ImageLimitSettings object. + :rtype: ImageLimitSettings + + .. tip:: + It's a good practice to match max_concurrent_trials count with the number of nodes in the cluster. + For example, if you have a cluster with 4 nodes, set max_concurrent_trials to 4. + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_limit_settings] + :end-before: [END automl.automl_image_job.image_limit_settings] + :language: python + :dedent: 8 + :caption: Defining the limit settings for an automl image job. + """ + + def __init__( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_trials: Optional[int] = None, + timeout_minutes: Optional[int] = None, + ) -> None: + self.max_concurrent_trials = max_concurrent_trials + self.max_trials = max_trials + self.timeout_minutes = timeout_minutes + + def _to_rest_object(self) -> RestImageLimitSettings: + """Convert ImageLimitSettings objects to a rest object. + + :return: A rest object of ImageLimitSettings objects. + :rtype: RestImageLimitSettings + """ + return RestImageLimitSettings( + max_concurrent_trials=self.max_concurrent_trials, + max_trials=self.max_trials, + timeout=to_iso_duration_format_mins(self.timeout_minutes), + ) + + @classmethod + def _from_rest_object(cls, obj: RestImageLimitSettings) -> "ImageLimitSettings": + """Convert the rest object to a dict containing items to init the ImageLimitSettings objects. + + :param obj: Limit settings for the AutoML job in Rest format. + :type obj: RestImageLimitSettings + :return: Limit settings for an AutoML Image Vertical. + :rtype: ImageLimitSettings + """ + return cls( + max_concurrent_trials=obj.max_concurrent_trials, + max_trials=obj.max_trials, + timeout_minutes=from_iso_duration_format_mins(obj.timeout), + ) + + def __eq__(self, other: object) -> bool: + """Check equality between two ImageLimitSettings objects. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, ImageLimitSettings): + return NotImplemented + + return ( + self.max_concurrent_trials == other.max_concurrent_trials + and self.max_trials == other.max_trials + and self.timeout_minutes == other.timeout_minutes + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two ImageLimitSettings objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_model_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_model_settings.py new file mode 100644 index 00000000..890f987a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_model_settings.py @@ -0,0 +1,876 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Any, Optional + +# pylint: disable=R0902,too-many-locals +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ImageModelSettingsClassification as RestImageModelSettingsClassification, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ImageModelSettingsObjectDetection as RestImageModelSettingsObjectDetection, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LearningRateScheduler, + LogTrainingMetrics, + LogValidationLoss, + ModelSize, + StochasticOptimizer, + ValidationMetricType, +) +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ImageModelDistributionSettings(RestTranslatableMixin): + """Model settings for all AutoML Image Verticals. + Please do not instantiate directly. Use the child classes instead. + + :param advanced_settings: Settings for advanced scenarios. + :type advanced_settings: str + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta1: float + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta2: float + :param checkpoint_frequency: Frequency to store model checkpoints. Must be a positive integer. + :type checkpoint_frequency: int + :param checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :type checkpoint_run_id: str + :param distributed: Whether to use distributed training. + :type distributed: bool + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait before + primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :type early_stopping_patience: int + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. Must + be a positive integer. + :type evaluation_frequency: int + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type layers_to_freeze: int + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, 1]. + :type momentum: float + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int + :param optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float in + the range [0, 1]. + :type step_lr_gamma: float + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be a + positive integer. + :type step_lr_step_size: int + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must be + a float in the range[0, 1]. + :type weight_decay: float + """ + + def __init__( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[LearningRateScheduler] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[StochasticOptimizer] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + ): + self.advanced_settings = advanced_settings + self.ams_gradient = ams_gradient + self.beta1 = beta1 + self.beta2 = beta2 + self.checkpoint_frequency = checkpoint_frequency + self.checkpoint_run_id = checkpoint_run_id + self.distributed = distributed + self.early_stopping = early_stopping + self.early_stopping_delay = early_stopping_delay + self.early_stopping_patience = early_stopping_patience + self.enable_onnx_normalization = enable_onnx_normalization + self.evaluation_frequency = evaluation_frequency + self.gradient_accumulation_step = gradient_accumulation_step + self.layers_to_freeze = layers_to_freeze + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.momentum = momentum + self.nesterov = nesterov + self.number_of_epochs = number_of_epochs + self.number_of_workers = number_of_workers + self.optimizer = optimizer + self.random_seed = random_seed + self.step_lr_gamma = step_lr_gamma + self.step_lr_step_size = step_lr_step_size + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_cosine_lr_cycles = warmup_cosine_lr_cycles + self.warmup_cosine_lr_warmup_epochs = warmup_cosine_lr_warmup_epochs + self.weight_decay = weight_decay + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageModelDistributionSettings): + return NotImplemented + + return ( + self.advanced_settings == other.advanced_settings + and self.ams_gradient == other.ams_gradient + and self.beta1 == other.beta1 + and self.beta2 == other.beta2 + and self.checkpoint_frequency == other.checkpoint_frequency + and self.checkpoint_run_id == other.checkpoint_run_id + and self.distributed == other.distributed + and self.early_stopping == other.early_stopping + and self.early_stopping_delay == other.early_stopping_delay + and self.early_stopping_patience == other.early_stopping_patience + and self.enable_onnx_normalization == other.enable_onnx_normalization + and self.evaluation_frequency == other.evaluation_frequency + and self.gradient_accumulation_step == other.gradient_accumulation_step + and self.layers_to_freeze == other.layers_to_freeze + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.momentum == other.momentum + and self.nesterov == other.nesterov + and self.number_of_epochs == other.number_of_epochs + and self.number_of_workers == other.number_of_workers + and self.optimizer == other.optimizer + and self.random_seed == other.random_seed + and self.step_lr_gamma == other.step_lr_gamma + and self.step_lr_step_size == other.step_lr_step_size + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_cosine_lr_cycles == other.warmup_cosine_lr_cycles + and self.warmup_cosine_lr_warmup_epochs == other.warmup_cosine_lr_warmup_epochs + and self.weight_decay == other.weight_decay + ) + + +class ImageModelSettingsClassification(ImageModelDistributionSettings): + """Model settings for AutoML Image Classification tasks. + + :param advanced_settings: Settings for advanced scenarios. + :type advanced_settings: str + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta1: float + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta2: float + :param checkpoint_frequency: Frequency to store model checkpoints. Must be a positive integer. + :type checkpoint_frequency: int + :param checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :type checkpoint_run_id: str + :param distributed: Whether to use distributed training. + :type distributed: bool + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait before + primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :type early_stopping_patience: int + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. Must + be a positive integer. + :type evaluation_frequency: int + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type layers_to_freeze: int + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, 1]. + :type momentum: float + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int + :param optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float in + the range [0, 1]. + :type step_lr_gamma: float + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be a + positive integer. + :type step_lr_step_size: int + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must be + a float in the range[0, 1]. + :type weight_decay: float + :param training_crop_size: Image crop size that is input to the neural network for the training + dataset. Must be a positive integer. + :type training_crop_size: int + :param validation_crop_size: Image crop size that is input to the neural network for the + validation dataset. Must be a positive integer. + :type validation_crop_size: int + :param validation_resize_size: Image size to which to resize before cropping for validation + dataset. Must be a positive integer. + :type validation_resize_size: int + :param weighted_loss: Weighted loss. The accepted values are 0 for no weighted loss. + 1 for weighted loss with sqrt.(class_weights). 2 for weighted loss with class_weights. Must be + 0 or 1 or 2. + :type weighted_loss: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_classification_model_settings] + :end-before: [END automl.automl_image_job.image_classification_model_settings] + :language: python + :dedent: 8 + :caption: Defining the automl image classification model settings. + """ + + def __init__( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[LearningRateScheduler] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[StochasticOptimizer] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + training_crop_size: Optional[int] = None, + validation_crop_size: Optional[int] = None, + validation_resize_size: Optional[int] = None, + weighted_loss: Optional[int] = None, + **kwargs: Any, + ): + super(ImageModelSettingsClassification, self).__init__( + advanced_settings=advanced_settings, + ams_gradient=ams_gradient, + beta1=beta1, + beta2=beta2, + checkpoint_frequency=checkpoint_frequency, + checkpoint_run_id=checkpoint_run_id, + distributed=distributed, + early_stopping=early_stopping, + early_stopping_delay=early_stopping_delay, + early_stopping_patience=early_stopping_patience, + enable_onnx_normalization=enable_onnx_normalization, + evaluation_frequency=evaluation_frequency, + gradient_accumulation_step=gradient_accumulation_step, + layers_to_freeze=layers_to_freeze, + learning_rate=learning_rate, + learning_rate_scheduler=learning_rate_scheduler, + model_name=model_name, + momentum=momentum, + nesterov=nesterov, + number_of_epochs=number_of_epochs, + number_of_workers=number_of_workers, + optimizer=optimizer, + random_seed=random_seed, + step_lr_gamma=step_lr_gamma, + step_lr_step_size=step_lr_step_size, + training_batch_size=training_batch_size, + validation_batch_size=validation_batch_size, + warmup_cosine_lr_cycles=warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=warmup_cosine_lr_warmup_epochs, + weight_decay=weight_decay, + **kwargs, + ) + self.training_crop_size = training_crop_size + self.validation_crop_size = validation_crop_size + self.validation_resize_size = validation_resize_size + self.weighted_loss = weighted_loss + + def _to_rest_object(self) -> RestImageModelSettingsClassification: + return RestImageModelSettingsClassification( + advanced_settings=self.advanced_settings, + ams_gradient=self.ams_gradient, + beta1=self.beta1, + beta2=self.beta2, + checkpoint_frequency=self.checkpoint_frequency, + checkpoint_run_id=self.checkpoint_run_id, + distributed=self.distributed, + early_stopping=self.early_stopping, + early_stopping_delay=self.early_stopping_delay, + early_stopping_patience=self.early_stopping_patience, + enable_onnx_normalization=self.enable_onnx_normalization, + evaluation_frequency=self.evaluation_frequency, + gradient_accumulation_step=self.gradient_accumulation_step, + layers_to_freeze=self.layers_to_freeze, + learning_rate=self.learning_rate, + learning_rate_scheduler=self.learning_rate_scheduler, + model_name=self.model_name, + momentum=self.momentum, + nesterov=self.nesterov, + number_of_epochs=self.number_of_epochs, + number_of_workers=self.number_of_workers, + optimizer=self.optimizer, + random_seed=self.random_seed, + step_lr_gamma=self.step_lr_gamma, + step_lr_step_size=self.step_lr_step_size, + training_batch_size=self.training_batch_size, + validation_batch_size=self.validation_batch_size, + warmup_cosine_lr_cycles=self.warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=self.warmup_cosine_lr_warmup_epochs, + weight_decay=self.weight_decay, + training_crop_size=self.training_crop_size, + validation_crop_size=self.validation_crop_size, + validation_resize_size=self.validation_resize_size, + weighted_loss=self.weighted_loss, + ) + + @classmethod + def _from_rest_object(cls, obj: RestImageModelSettingsClassification) -> "ImageModelSettingsClassification": + return cls( + advanced_settings=obj.advanced_settings, + ams_gradient=obj.ams_gradient, + beta1=obj.beta1, + beta2=obj.beta2, + checkpoint_frequency=obj.checkpoint_frequency, + checkpoint_run_id=obj.checkpoint_run_id, + distributed=obj.distributed, + early_stopping=obj.early_stopping, + early_stopping_delay=obj.early_stopping_delay, + early_stopping_patience=obj.early_stopping_patience, + enable_onnx_normalization=obj.enable_onnx_normalization, + evaluation_frequency=obj.evaluation_frequency, + gradient_accumulation_step=obj.gradient_accumulation_step, + layers_to_freeze=obj.layers_to_freeze, + learning_rate=obj.learning_rate, + learning_rate_scheduler=obj.learning_rate_scheduler, + model_name=obj.model_name, + momentum=obj.momentum, + nesterov=obj.nesterov, + number_of_epochs=obj.number_of_epochs, + number_of_workers=obj.number_of_workers, + optimizer=obj.optimizer, + random_seed=obj.random_seed, + step_lr_gamma=obj.step_lr_gamma, + step_lr_step_size=obj.step_lr_step_size, + training_batch_size=obj.training_batch_size, + validation_batch_size=obj.validation_batch_size, + warmup_cosine_lr_cycles=obj.warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=obj.warmup_cosine_lr_warmup_epochs, + weight_decay=obj.weight_decay, + training_crop_size=obj.training_crop_size, + validation_crop_size=obj.validation_crop_size, + validation_resize_size=obj.validation_resize_size, + weighted_loss=obj.weighted_loss, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageModelSettingsClassification): + return NotImplemented + + return ( + super().__eq__(other) + and self.training_crop_size == other.training_crop_size + and self.validation_crop_size == other.validation_crop_size + and self.validation_resize_size == other.validation_resize_size + and self.weighted_loss == other.weighted_loss + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +class ImageModelSettingsObjectDetection(ImageModelDistributionSettings): + """Model settings for AutoML Image Object Detection Task. + + :param advanced_settings: Settings for advanced scenarios. + :type advanced_settings: str + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta1: float + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta2: float + :param checkpoint_frequency: Frequency to store model checkpoints. Must be a positive integer. + :type checkpoint_frequency: int + :param checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :type checkpoint_run_id: str + :param distributed: Whether to use distributed training. + :type distributed: bool + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait before + primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :type early_stopping_patience: int + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. Must + be a positive integer. + :type evaluation_frequency: int + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type layers_to_freeze: int + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, 1]. + :type momentum: float + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int + :param optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float in + the range [0, 1]. + :type step_lr_gamma: float + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be a + positive integer. + :type step_lr_step_size: int + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must be + a float in the range[0, 1]. + :type weight_decay: float + :param box_detections_per_image: Maximum number of detections per image, for all classes. Must + be a positive integer. + Note: This settings is not supported for the 'yolov5' algorithm. + :type box_detections_per_image: int + :param box_score_threshold: During inference, only return proposals with a classification score + greater than + BoxScoreThreshold. Must be a float in the range[0, 1]. + :type box_score_threshold: float + :param image_size: Image size for train and validation. Must be a positive integer. + Note: The training run may get into CUDA OOM if the size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type image_size: int + :param max_size: Maximum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type max_size: int + :param min_size: Minimum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type min_size: int + :param model_size: Model size. Must be 'small', 'medium', 'large'. + Note: training run may get into CUDA OOM if the model size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. Possible values include: + "None", "Small", "Medium", "Large", "ExtraLarge". + :type model_size: str or ~azure.mgmt.machinelearningservices.models.ModelSize + :param multi_scale: Enable multi-scale image by varying image size by +/- 50%. + Note: training run may get into CUDA OOM if no sufficient GPU memory. + Note: This settings is only supported for the 'yolov5' algorithm. + :type multi_scale: bool + :param nms_iou_threshold: IOU threshold used during inference in NMS post processing. Must be a + float in the range [0, 1]. + :type nms_iou_threshold: float + :param tile_grid_size: The grid size to use for tiling each image. Note: TileGridSize must not + be + None to enable small object detection logic. A string containing two integers in mxn format. + Note: This settings is not supported for the 'yolov5' algorithm. + :type tile_grid_size: str + :param tile_overlap_ratio: Overlap ratio between adjacent tiles in each dimension. Must be float + in the range [0, 1). + Note: This settings is not supported for the 'yolov5' algorithm. + :type tile_overlap_ratio: float + :param tile_predictions_nms_threshold: The IOU threshold to use to perform NMS while merging + predictions from tiles and image. + Used in validation/ inference. Must be float in the range [0, 1]. + Note: This settings is not supported for the 'yolov5' algorithm. + :type tile_predictions_nms_threshold: float + :param validation_iou_threshold: IOU threshold to use when computing validation metric. Must be + float in the range [0, 1]. + :type validation_iou_threshold: float + :param validation_metric_type: Metric computation method to use for validation metrics. Possible + values include: "None", "Coco", "Voc", "CocoVoc". + :type validation_metric_type: str or + ~azure.mgmt.machinelearningservices.models.ValidationMetricType + :param log_training_metrics: indicates whether or not to log training metrics + :type log_training_metrics: str or + ~azure.mgmt.machinelearningservices.models.LogTrainingMetrics + :param log_validation_loss: indicates whether or not to log validation loss + :type log_validation_loss: str or + ~azure.mgmt.machinelearningservices.models.LogValidationLoss + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_object_detection_model_settings] + :end-before: [END automl.automl_image_job.image_object_detection_model_settings] + :language: python + :dedent: 8 + :caption: Defining the automl image object detection or instance segmentation model settings. + """ + + def __init__( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[LearningRateScheduler] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[StochasticOptimizer] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + box_detections_per_image: Optional[int] = None, + box_score_threshold: Optional[float] = None, + image_size: Optional[int] = None, + max_size: Optional[int] = None, + min_size: Optional[int] = None, + model_size: Optional[ModelSize] = None, + multi_scale: Optional[bool] = None, + nms_iou_threshold: Optional[float] = None, + tile_grid_size: Optional[str] = None, + tile_overlap_ratio: Optional[float] = None, + tile_predictions_nms_threshold: Optional[float] = None, + validation_iou_threshold: Optional[float] = None, + validation_metric_type: Optional[ValidationMetricType] = None, + log_training_metrics: Optional[LogTrainingMetrics] = None, + log_validation_loss: Optional[LogValidationLoss] = None, + **kwargs: Any, + ): + super(ImageModelSettingsObjectDetection, self).__init__( + advanced_settings=advanced_settings, + ams_gradient=ams_gradient, + beta1=beta1, + beta2=beta2, + checkpoint_frequency=checkpoint_frequency, + checkpoint_run_id=checkpoint_run_id, + distributed=distributed, + early_stopping=early_stopping, + early_stopping_delay=early_stopping_delay, + early_stopping_patience=early_stopping_patience, + enable_onnx_normalization=enable_onnx_normalization, + evaluation_frequency=evaluation_frequency, + gradient_accumulation_step=gradient_accumulation_step, + layers_to_freeze=layers_to_freeze, + learning_rate=learning_rate, + learning_rate_scheduler=learning_rate_scheduler, + model_name=model_name, + momentum=momentum, + nesterov=nesterov, + number_of_epochs=number_of_epochs, + number_of_workers=number_of_workers, + optimizer=optimizer, + random_seed=random_seed, + step_lr_gamma=step_lr_gamma, + step_lr_step_size=step_lr_step_size, + training_batch_size=training_batch_size, + validation_batch_size=validation_batch_size, + warmup_cosine_lr_cycles=warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=warmup_cosine_lr_warmup_epochs, + weight_decay=weight_decay, + **kwargs, + ) + self.box_detections_per_image = box_detections_per_image + self.box_score_threshold = box_score_threshold + self.image_size = image_size + self.max_size = max_size + self.min_size = min_size + self.model_size = model_size + self.multi_scale = multi_scale + self.nms_iou_threshold = nms_iou_threshold + self.tile_grid_size = tile_grid_size + self.tile_overlap_ratio = tile_overlap_ratio + self.tile_predictions_nms_threshold = tile_predictions_nms_threshold + self.validation_iou_threshold = validation_iou_threshold + self.validation_metric_type = validation_metric_type + self.log_training_metrics = log_training_metrics + self.log_validation_loss = log_validation_loss + + def _to_rest_object(self) -> RestImageModelSettingsObjectDetection: + return RestImageModelSettingsObjectDetection( + advanced_settings=self.advanced_settings, + ams_gradient=self.ams_gradient, + beta1=self.beta1, + beta2=self.beta2, + checkpoint_frequency=self.checkpoint_frequency, + checkpoint_run_id=self.checkpoint_run_id, + distributed=self.distributed, + early_stopping=self.early_stopping, + early_stopping_delay=self.early_stopping_delay, + early_stopping_patience=self.early_stopping_patience, + enable_onnx_normalization=self.enable_onnx_normalization, + evaluation_frequency=self.evaluation_frequency, + gradient_accumulation_step=self.gradient_accumulation_step, + layers_to_freeze=self.layers_to_freeze, + learning_rate=self.learning_rate, + learning_rate_scheduler=self.learning_rate_scheduler, + model_name=self.model_name, + momentum=self.momentum, + nesterov=self.nesterov, + number_of_epochs=self.number_of_epochs, + number_of_workers=self.number_of_workers, + optimizer=self.optimizer, + random_seed=self.random_seed, + step_lr_gamma=self.step_lr_gamma, + step_lr_step_size=self.step_lr_step_size, + training_batch_size=self.training_batch_size, + validation_batch_size=self.validation_batch_size, + warmup_cosine_lr_cycles=self.warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=self.warmup_cosine_lr_warmup_epochs, + weight_decay=self.weight_decay, + box_detections_per_image=self.box_detections_per_image, + box_score_threshold=self.box_score_threshold, + image_size=self.image_size, + max_size=self.max_size, + min_size=self.min_size, + model_size=self.model_size, + multi_scale=self.multi_scale, + nms_iou_threshold=self.nms_iou_threshold, + tile_grid_size=self.tile_grid_size, + tile_overlap_ratio=self.tile_overlap_ratio, + tile_predictions_nms_threshold=self.tile_predictions_nms_threshold, + validation_iou_threshold=self.validation_iou_threshold, + validation_metric_type=self.validation_metric_type, + log_training_metrics=self.log_training_metrics, + log_validation_loss=self.log_validation_loss, + ) + + @classmethod + def _from_rest_object(cls, obj: RestImageModelSettingsObjectDetection) -> "ImageModelSettingsObjectDetection": + return cls( + advanced_settings=obj.advanced_settings, + ams_gradient=obj.ams_gradient, + beta1=obj.beta1, + beta2=obj.beta2, + checkpoint_frequency=obj.checkpoint_frequency, + checkpoint_run_id=obj.checkpoint_run_id, + distributed=obj.distributed, + early_stopping=obj.early_stopping, + early_stopping_delay=obj.early_stopping_delay, + early_stopping_patience=obj.early_stopping_patience, + enable_onnx_normalization=obj.enable_onnx_normalization, + evaluation_frequency=obj.evaluation_frequency, + gradient_accumulation_step=obj.gradient_accumulation_step, + layers_to_freeze=obj.layers_to_freeze, + learning_rate=obj.learning_rate, + learning_rate_scheduler=obj.learning_rate_scheduler, + model_name=obj.model_name, + momentum=obj.momentum, + nesterov=obj.nesterov, + number_of_epochs=obj.number_of_epochs, + number_of_workers=obj.number_of_workers, + optimizer=obj.optimizer, + random_seed=obj.random_seed, + step_lr_gamma=obj.step_lr_gamma, + step_lr_step_size=obj.step_lr_step_size, + training_batch_size=obj.training_batch_size, + validation_batch_size=obj.validation_batch_size, + warmup_cosine_lr_cycles=obj.warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=obj.warmup_cosine_lr_warmup_epochs, + weight_decay=obj.weight_decay, + box_detections_per_image=obj.box_detections_per_image, + box_score_threshold=obj.box_score_threshold, + image_size=obj.image_size, + max_size=obj.max_size, + min_size=obj.min_size, + model_size=obj.model_size, + multi_scale=obj.multi_scale, + nms_iou_threshold=obj.nms_iou_threshold, + tile_grid_size=obj.tile_grid_size, + tile_overlap_ratio=obj.tile_overlap_ratio, + tile_predictions_nms_threshold=obj.tile_predictions_nms_threshold, + validation_iou_threshold=obj.validation_iou_threshold, + validation_metric_type=obj.validation_metric_type, + log_training_metrics=obj.log_training_metrics, + log_validation_loss=obj.log_validation_loss, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageModelSettingsObjectDetection): + return NotImplemented + + return ( + super().__eq__(other) + and self.box_detections_per_image == other.box_detections_per_image + and self.box_score_threshold == other.box_score_threshold + and self.image_size == other.image_size + and self.max_size == other.max_size + and self.min_size == other.min_size + and self.model_size == other.model_size + and self.multi_scale == other.multi_scale + and self.nms_iou_threshold == other.nms_iou_threshold + and self.tile_grid_size == other.tile_grid_size + and self.tile_overlap_ratio == other.tile_overlap_ratio + and self.tile_predictions_nms_threshold == other.tile_predictions_nms_threshold + and self.validation_iou_threshold == other.validation_iou_threshold + and self.validation_metric_type == other.validation_metric_type + and self.log_training_metrics == other.log_training_metrics + and self.log_validation_loss == other.log_validation_loss + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_job.py new file mode 100644 index 00000000..f8d070d2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_job.py @@ -0,0 +1,240 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageObjectDetection as RestImageObjectDetection +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, ObjectDetectionPrimaryMetrics, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.image.automl_image_object_detection_base import AutoMLImageObjectDetectionBase +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsObjectDetection +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ImageObjectDetectionJob(AutoMLImageObjectDetectionBase): + """Configuration for AutoML Image Object Detection job. + + :keyword primary_metric: The primary metric to use for optimization. + :paramtype primary_metric: Optional[str, ~azure.ai.ml.ObjectDetectionPrimaryMetrics] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_object_detection_job] + :end-before: [END automl.automl_image_job.image_object_detection_job] + :language: python + :dedent: 8 + :caption: creating an automl image object detection job + """ + + _DEFAULT_PRIMARY_METRIC = ObjectDetectionPrimaryMetrics.MEAN_AVERAGE_PRECISION + + def __init__( + self, + *, + primary_metric: Optional[Union[str, ObjectDetectionPrimaryMetrics]] = None, + **kwargs: Any, + ) -> None: + + # Extract any super class init settings + limits = kwargs.pop("limits", None) + sweep = kwargs.pop("sweep", None) + training_parameters = kwargs.pop("training_parameters", None) + search_space = kwargs.pop("search_space", None) + + super().__init__( + task_type=TaskType.IMAGE_OBJECT_DETECTION, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=search_space, + **kwargs, + ) + + self.primary_metric = primary_metric or ImageObjectDetectionJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Union[str, ObjectDetectionPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ObjectDetectionPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ImageObjectDetectionJob._DEFAULT_PRIMARY_METRIC + if value is None + else ObjectDetectionPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + image_object_detection_task = RestImageObjectDetection( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + model_settings=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(image_object_detection_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=image_object_detection_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ImageObjectDetectionJob": + properties: RestAutoMLJob = obj.properties + task_details: RestImageObjectDetection = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + image_object_detection_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + limits=( + ImageLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + sweep=( + ImageSweepSettings._from_rest_object(task_details.sweep_settings) + if task_details.sweep_settings + else None + ), + training_parameters=( + ImageModelSettingsObjectDetection._from_rest_object(task_details.model_settings) + if task_details.model_settings + else None + ), + search_space=cls._get_search_space_from_str(task_details.search_space), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + image_object_detection_job._restore_data_inputs() + + return image_object_detection_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ImageObjectDetectionJob": + from azure.ai.ml._schema.automl.image_vertical.image_object_detection import ImageObjectDetectionSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageObjectDetectionNodeSchema + + if kwargs.pop("inside_pipeline", False): + if context.get("inside_pipeline", None) is None: + context["inside_pipeline"] = True + loaded_data = load_from_dict( + ImageObjectDetectionNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(ImageObjectDetectionSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ImageObjectDetectionJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + } + job = ImageObjectDetectionJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.image_vertical.image_object_detection import ImageObjectDetectionSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageObjectDetectionNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = ImageObjectDetectionNodeSchema( + context={BASE_PATH_CONTEXT_KEY: "./", "inside_pipeline": True} + ).dump(self) + else: + schema_dict = ImageObjectDetectionSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageObjectDetectionJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_search_space.py new file mode 100644 index 00000000..a9004d1e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_search_space.py @@ -0,0 +1,899 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=R0902,too-many-locals + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageModelDistributionSettingsObjectDetection +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.search_space_utils import _convert_from_rest_object, _convert_to_rest_object +from azure.ai.ml.entities._mixins import RestTranslatableMixin +from azure.ai.ml.sweep import ( + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, +) + + +class ImageObjectDetectionSearchSpace(RestTranslatableMixin): + """Search space for AutoML Image Object Detection and Image Instance Segmentation tasks. + + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool or ~azure.ai.ml.entities.SweepDistribution + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :type beta1: float or ~azure.ai.ml.entities.SweepDistribution + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :type beta2: float or ~azure.ai.ml.entities.SweepDistribution + :param distributed: Whether to use distributer training. + :type distributed: bool or ~azure.ai.ml.entities.SweepDistribution + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool or ~azure.ai.ml.entities.SweepDistribution + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait + before primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int or ~azure.ai.ml.entities.SweepDistribution + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before the run is stopped. Must be a positive integer. + :type early_stopping_patience: int or ~azure.ai.ml.entities.SweepDistribution + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool or ~azure.ai.ml.entities.SweepDistribution + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. + Must be a positive integer. + :type evaluation_frequency: int or ~azure.ai.ml.entities.SweepDistribution + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without updating the model weights while accumulating the gradients of those steps, + and then using the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int or ~azure.ai.ml.entities.SweepDistribution + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive + integer. For instance, passing 2 as value for 'seresnext' means freezing layer0 and layer1. + For a full list of models supported and details on layer freeze, please + see: https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters#model-agnostic-hyperparameters. # pylint: disable=line-too-long + :type layers_to_freeze: int or ~azure.ai.ml.entities.SweepDistribution + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float or ~azure.ai.ml.entities.SweepDistribution + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. + :type learning_rate_scheduler: str or ~azure.ai.ml.entities.SweepDistribution + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str or ~azure.ai.ml.entities.SweepDistribution + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, + 1]. + :type momentum: float or ~azure.ai.ml.entities.SweepDistribution + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool or ~azure.ai.ml.entities.SweepDistribution + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int or ~azure.ai.ml.entities.SweepDistribution + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int or ~azure.ai.ml.entities.SweepDistribution + :param optimizer: Type of optimizer. Must be either 'sgd', 'adam', or 'adamw'. + :type optimizer: str or ~azure.ai.ml.entities.SweepDistribution + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int or ~azure.ai.ml.entities.SweepDistribution + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float + in the range [0, 1]. + :type step_lr_gamma: float or ~azure.ai.ml.entities.SweepDistribution + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be + a positive integer. + :type step_lr_step_size: int or ~azure.ai.ml.entities.SweepDistribution + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int or ~azure.ai.ml.entities.SweepDistribution + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int or ~azure.ai.ml.entities.SweepDistribution + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float or ~azure.ai.ml.entities.SweepDistribution + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int or ~azure.ai.ml.entities.SweepDistribution + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must + be a float in the range[0, 1]. + :type weight_decay: int or ~azure.ai.ml.entities.SweepDistribution + :param box_detections_per_image: Maximum number of detections per image, for all classes. + Must be a positive integer. Note: This settings is not supported for the 'yolov5' algorithm. + :type box_detections_per_image: int or ~azure.ai.ml.entities.SweepDistribution + :param box_score_threshold: During inference, only return proposals with a classification + score greater than BoxScoreThreshold. Must be a float in the range[0, 1]. + :type box_score_threshold: float or ~azure.ai.ml.entities.SweepDistribution + :param image_size: Image size for train and validation. Must be a positive integer. + Note: The training run may get into CUDA OOM if the size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type image_size: int or ~azure.ai.ml.entities.SweepDistribution + :param max_size: Maximum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type max_size: int or ~azure.ai.ml.entities.SweepDistribution + :param min_size: Minimum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type min_size: int or ~azure.ai.ml.entities.SweepDistribution + :param model_size: Model size. Must be 'small', 'medium', 'large', or 'extra_large'. + Note: training run may get into CUDA OOM if the model size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type model_size: str or ~azure.ai.ml.entities.SweepDistribution + :param multi_scale: Enable multi-scale image by varying image size by +/- 50%. + Note: training run may get into CUDA OOM if no sufficient GPU memory. + Note: This settings is only supported for the 'yolov5' algorithm. + :type multi_scale: bool or ~azure.ai.ml.entities.SweepDistribution + :param nms_iou_threshold: IOU threshold used during inference in NMS post processing. Must be + float in the range [0, 1]. + :type nms_iou_threshold: float or ~azure.ai.ml.entities.SweepDistribution + :param tile_grid_size: The grid size to use for tiling each image. Note: TileGridSize must + not be None to enable small object detection logic. A string containing two integers in mxn format. + :type tile_grid_size: str or ~azure.ai.ml.entities.SweepDistribution + :param tile_overlap_ratio: Overlap ratio between adjacent tiles in each dimension. Must be + float in the range [0, 1). + :type tile_overlap_ratio: float or ~azure.ai.ml.entities.SweepDistribution + :param tile_predictions_nms_threshold: The IOU threshold to use to perform NMS while merging + predictions from tiles and image. Used in validation/ inference. Must be float in the range [0, 1]. + NMS: Non-maximum suppression. + :type tile_predictions_nms_threshold: float or ~azure.ai.ml.entities.SweepDistribution + :param validation_iou_threshold: IOU threshold to use when computing validation metric. Must + be float in the range [0, 1]. + :type validation_iou_threshold: float or ~azure.ai.ml.entities.SweepDistribution + :param validation_metric_type: Metric computation method to use for validation metrics. Must + be 'none', 'coco', 'voc', or 'coco_voc'. + :type validation_metric_type: str or ~azure.ai.ml.entities.SweepDistribution + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_object_detection_search_space] + :end-before: [END automl.automl_image_job.image_object_detection_search_space] + :language: python + :dedent: 8 + :caption: Defining an automl image object detection or instance segmentation search space + """ + + def __init__( + self, + *, + ams_gradient: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + beta1: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + beta2: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + distributed: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + early_stopping: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + early_stopping_delay: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + early_stopping_patience: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + enable_onnx_normalization: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + evaluation_frequency: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + gradient_accumulation_step: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + layers_to_freeze: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + learning_rate: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + learning_rate_scheduler: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + model_name: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + momentum: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + nesterov: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + number_of_epochs: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + number_of_workers: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + optimizer: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + random_seed: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + step_lr_gamma: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + step_lr_step_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + training_batch_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + validation_batch_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + warmup_cosine_lr_cycles: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + warmup_cosine_lr_warmup_epochs: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + weight_decay: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + box_detections_per_image: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + box_score_threshold: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + image_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + max_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + min_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + model_size: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + multi_scale: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + nms_iou_threshold: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + tile_grid_size: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + tile_overlap_ratio: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + tile_predictions_nms_threshold: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + validation_iou_threshold: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + validation_metric_type: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + ) -> None: + self.ams_gradient = ams_gradient + self.beta1 = beta1 + self.beta2 = beta2 + self.distributed = distributed + self.early_stopping = early_stopping + self.early_stopping_delay = early_stopping_delay + self.early_stopping_patience = early_stopping_patience + self.enable_onnx_normalization = enable_onnx_normalization + self.evaluation_frequency = evaluation_frequency + self.gradient_accumulation_step = gradient_accumulation_step + self.layers_to_freeze = layers_to_freeze + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.momentum = momentum + self.nesterov = nesterov + self.number_of_epochs = number_of_epochs + self.number_of_workers = number_of_workers + self.optimizer = optimizer + self.random_seed = random_seed + self.step_lr_gamma = step_lr_gamma + self.step_lr_step_size = step_lr_step_size + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_cosine_lr_cycles = warmup_cosine_lr_cycles + self.warmup_cosine_lr_warmup_epochs = warmup_cosine_lr_warmup_epochs + self.weight_decay = weight_decay + self.box_detections_per_image = box_detections_per_image + self.box_score_threshold = box_score_threshold + self.image_size = image_size + self.max_size = max_size + self.min_size = min_size + self.model_size = model_size + self.multi_scale = multi_scale + self.nms_iou_threshold = nms_iou_threshold + self.tile_grid_size = tile_grid_size + self.tile_overlap_ratio = tile_overlap_ratio + self.tile_predictions_nms_threshold = tile_predictions_nms_threshold + self.validation_iou_threshold = validation_iou_threshold + self.validation_metric_type = validation_metric_type + + def _to_rest_object(self) -> ImageModelDistributionSettingsObjectDetection: + return ImageModelDistributionSettingsObjectDetection( + ams_gradient=_convert_to_rest_object(self.ams_gradient) if self.ams_gradient is not None else None, + beta1=_convert_to_rest_object(self.beta1) if self.beta1 is not None else None, + beta2=_convert_to_rest_object(self.beta2) if self.beta2 is not None else None, + distributed=_convert_to_rest_object(self.distributed) if self.distributed is not None else None, + early_stopping=_convert_to_rest_object(self.early_stopping) if self.early_stopping is not None else None, + early_stopping_delay=( + _convert_to_rest_object(self.early_stopping_delay) if self.early_stopping_delay is not None else None + ), + early_stopping_patience=( + _convert_to_rest_object(self.early_stopping_patience) + if self.early_stopping_patience is not None + else None + ), + enable_onnx_normalization=( + _convert_to_rest_object(self.enable_onnx_normalization) + if self.enable_onnx_normalization is not None + else None + ), + evaluation_frequency=( + _convert_to_rest_object(self.evaluation_frequency) if self.evaluation_frequency is not None else None + ), + gradient_accumulation_step=( + _convert_to_rest_object(self.gradient_accumulation_step) + if self.gradient_accumulation_step is not None + else None + ), + layers_to_freeze=( + _convert_to_rest_object(self.layers_to_freeze) if self.layers_to_freeze is not None else None + ), + learning_rate=_convert_to_rest_object(self.learning_rate) if self.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_to_rest_object(self.learning_rate_scheduler) + if self.learning_rate_scheduler is not None + else None + ), + model_name=_convert_to_rest_object(self.model_name) if self.model_name is not None else None, + momentum=_convert_to_rest_object(self.momentum) if self.momentum is not None else None, + nesterov=_convert_to_rest_object(self.nesterov) if self.nesterov is not None else None, + number_of_epochs=( + _convert_to_rest_object(self.number_of_epochs) if self.number_of_epochs is not None else None + ), + number_of_workers=( + _convert_to_rest_object(self.number_of_workers) if self.number_of_workers is not None else None + ), + optimizer=_convert_to_rest_object(self.optimizer) if self.optimizer is not None else None, + random_seed=_convert_to_rest_object(self.random_seed) if self.random_seed is not None else None, + step_lr_gamma=_convert_to_rest_object(self.step_lr_gamma) if self.step_lr_gamma is not None else None, + step_lr_step_size=( + _convert_to_rest_object(self.step_lr_step_size) if self.step_lr_step_size is not None else None + ), + training_batch_size=( + _convert_to_rest_object(self.training_batch_size) if self.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_to_rest_object(self.validation_batch_size) if self.validation_batch_size is not None else None + ), + warmup_cosine_lr_cycles=( + _convert_to_rest_object(self.warmup_cosine_lr_cycles) + if self.warmup_cosine_lr_cycles is not None + else None + ), + warmup_cosine_lr_warmup_epochs=( + _convert_to_rest_object(self.warmup_cosine_lr_warmup_epochs) + if self.warmup_cosine_lr_warmup_epochs is not None + else None + ), + weight_decay=_convert_to_rest_object(self.weight_decay) if self.weight_decay is not None else None, + box_detections_per_image=( + _convert_to_rest_object(self.box_detections_per_image) + if self.box_detections_per_image is not None + else None + ), + box_score_threshold=( + _convert_to_rest_object(self.box_score_threshold) if self.box_score_threshold is not None else None + ), + image_size=_convert_to_rest_object(self.image_size) if self.image_size is not None else None, + max_size=_convert_to_rest_object(self.max_size) if self.max_size is not None else None, + min_size=_convert_to_rest_object(self.min_size) if self.min_size is not None else None, + model_size=_convert_to_rest_object(self.model_size) if self.model_size is not None else None, + multi_scale=_convert_to_rest_object(self.multi_scale) if self.multi_scale is not None else None, + nms_iou_threshold=( + _convert_to_rest_object(self.nms_iou_threshold) if self.nms_iou_threshold is not None else None + ), + tile_grid_size=_convert_to_rest_object(self.tile_grid_size) if self.tile_grid_size is not None else None, + tile_overlap_ratio=( + _convert_to_rest_object(self.tile_overlap_ratio) if self.tile_overlap_ratio is not None else None + ), + tile_predictions_nms_threshold=( + _convert_to_rest_object(self.tile_predictions_nms_threshold) + if self.tile_predictions_nms_threshold is not None + else None + ), + validation_iou_threshold=( + _convert_to_rest_object(self.validation_iou_threshold) + if self.validation_iou_threshold is not None + else None + ), + validation_metric_type=( + _convert_to_rest_object(self.validation_metric_type) + if self.validation_metric_type is not None + else None + ), + ) + + @classmethod + def _from_rest_object(cls, obj: ImageModelDistributionSettingsObjectDetection) -> "ImageObjectDetectionSearchSpace": + return cls( + ams_gradient=_convert_from_rest_object(obj.ams_gradient) if obj.ams_gradient is not None else None, + beta1=_convert_from_rest_object(obj.beta1) if obj.beta1 is not None else None, + beta2=_convert_from_rest_object(obj.beta2) if obj.beta2 is not None else None, + distributed=_convert_from_rest_object(obj.distributed) if obj.distributed is not None else None, + early_stopping=_convert_from_rest_object(obj.early_stopping) if obj.early_stopping is not None else None, + early_stopping_delay=( + _convert_from_rest_object(obj.early_stopping_delay) if obj.early_stopping_delay is not None else None + ), + early_stopping_patience=( + _convert_from_rest_object(obj.early_stopping_patience) + if obj.early_stopping_patience is not None + else None + ), + enable_onnx_normalization=( + _convert_from_rest_object(obj.enable_onnx_normalization) + if obj.enable_onnx_normalization is not None + else None + ), + evaluation_frequency=( + _convert_from_rest_object(obj.evaluation_frequency) if obj.evaluation_frequency is not None else None + ), + gradient_accumulation_step=( + _convert_from_rest_object(obj.gradient_accumulation_step) + if obj.gradient_accumulation_step is not None + else None + ), + layers_to_freeze=( + _convert_from_rest_object(obj.layers_to_freeze) if obj.layers_to_freeze is not None else None + ), + learning_rate=_convert_from_rest_object(obj.learning_rate) if obj.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_from_rest_object(obj.learning_rate_scheduler) + if obj.learning_rate_scheduler is not None + else None + ), + model_name=_convert_from_rest_object(obj.model_name) if obj.model_name is not None else None, + momentum=_convert_from_rest_object(obj.momentum) if obj.momentum is not None else None, + nesterov=_convert_from_rest_object(obj.nesterov) if obj.nesterov is not None else None, + number_of_epochs=( + _convert_from_rest_object(obj.number_of_epochs) if obj.number_of_epochs is not None else None + ), + number_of_workers=( + _convert_from_rest_object(obj.number_of_workers) if obj.number_of_workers is not None else None + ), + optimizer=_convert_from_rest_object(obj.optimizer) if obj.optimizer is not None else None, + random_seed=_convert_from_rest_object(obj.random_seed) if obj.random_seed is not None else None, + step_lr_gamma=_convert_from_rest_object(obj.step_lr_gamma) if obj.step_lr_gamma is not None else None, + step_lr_step_size=( + _convert_from_rest_object(obj.step_lr_step_size) if obj.step_lr_step_size is not None else None + ), + training_batch_size=( + _convert_from_rest_object(obj.training_batch_size) if obj.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_from_rest_object(obj.validation_batch_size) if obj.validation_batch_size is not None else None + ), + warmup_cosine_lr_cycles=( + _convert_from_rest_object(obj.warmup_cosine_lr_cycles) + if obj.warmup_cosine_lr_cycles is not None + else None + ), + warmup_cosine_lr_warmup_epochs=( + _convert_from_rest_object(obj.warmup_cosine_lr_warmup_epochs) + if obj.warmup_cosine_lr_warmup_epochs is not None + else None + ), + weight_decay=_convert_from_rest_object(obj.weight_decay) if obj.weight_decay is not None else None, + box_detections_per_image=( + _convert_from_rest_object(obj.box_detections_per_image) + if obj.box_detections_per_image is not None + else None + ), + box_score_threshold=( + _convert_from_rest_object(obj.box_score_threshold) if obj.box_score_threshold is not None else None + ), + image_size=_convert_from_rest_object(obj.image_size) if obj.image_size is not None else None, + max_size=_convert_from_rest_object(obj.max_size) if obj.max_size is not None else None, + min_size=_convert_from_rest_object(obj.min_size) if obj.min_size is not None else None, + model_size=_convert_from_rest_object(obj.model_size) if obj.model_size is not None else None, + multi_scale=_convert_from_rest_object(obj.multi_scale) if obj.multi_scale is not None else None, + nms_iou_threshold=( + _convert_from_rest_object(obj.nms_iou_threshold) if obj.nms_iou_threshold is not None else None + ), + tile_grid_size=_convert_from_rest_object(obj.tile_grid_size) if obj.tile_grid_size is not None else None, + tile_overlap_ratio=( + _convert_from_rest_object(obj.tile_overlap_ratio) if obj.tile_overlap_ratio is not None else None + ), + tile_predictions_nms_threshold=( + _convert_from_rest_object(obj.tile_predictions_nms_threshold) + if obj.tile_predictions_nms_threshold is not None + else None + ), + validation_iou_threshold=( + _convert_from_rest_object(obj.validation_iou_threshold) + if obj.validation_iou_threshold is not None + else None + ), + validation_metric_type=( + _convert_from_rest_object(obj.validation_metric_type) + if obj.validation_metric_type is not None + else None + ), + ) + + @classmethod + def _from_search_space_object(cls, obj: SearchSpace) -> "ImageObjectDetectionSearchSpace": + return cls( + ams_gradient=obj.ams_gradient if hasattr(obj, "ams_gradient") else None, + beta1=obj.beta1 if hasattr(obj, "beta1") else None, + beta2=obj.beta2 if hasattr(obj, "beta2") else None, + distributed=obj.distributed if hasattr(obj, "distributed") else None, + early_stopping=obj.early_stopping if hasattr(obj, "early_stopping") else None, + early_stopping_delay=obj.early_stopping_delay if hasattr(obj, "early_stopping_delay") else None, + early_stopping_patience=obj.early_stopping_patience if hasattr(obj, "early_stopping_patience") else None, + enable_onnx_normalization=( + obj.enable_onnx_normalization if hasattr(obj, "enable_onnx_normalization") else None + ), + evaluation_frequency=obj.evaluation_frequency if hasattr(obj, "evaluation_frequency") else None, + gradient_accumulation_step=( + obj.gradient_accumulation_step if hasattr(obj, "gradient_accumulation_step") else None + ), + layers_to_freeze=obj.layers_to_freeze if hasattr(obj, "layers_to_freeze") else None, + learning_rate=obj.learning_rate if hasattr(obj, "learning_rate") else None, + learning_rate_scheduler=obj.learning_rate_scheduler if hasattr(obj, "learning_rate_scheduler") else None, + model_name=obj.model_name if hasattr(obj, "model_name") else None, + momentum=obj.momentum if hasattr(obj, "momentum") else None, + nesterov=obj.nesterov if hasattr(obj, "nesterov") else None, + number_of_epochs=obj.number_of_epochs if hasattr(obj, "number_of_epochs") else None, + number_of_workers=obj.number_of_workers if hasattr(obj, "number_of_workers") else None, + optimizer=obj.optimizer if hasattr(obj, "optimizer") else None, + random_seed=obj.random_seed if hasattr(obj, "random_seed") else None, + step_lr_gamma=obj.step_lr_gamma if hasattr(obj, "step_lr_gamma") else None, + step_lr_step_size=obj.step_lr_step_size if hasattr(obj, "step_lr_step_size") else None, + training_batch_size=obj.training_batch_size if hasattr(obj, "training_batch_size") else None, + validation_batch_size=obj.validation_batch_size if hasattr(obj, "validation_batch_size") else None, + warmup_cosine_lr_cycles=obj.warmup_cosine_lr_cycles if hasattr(obj, "warmup_cosine_lr_cycles") else None, + warmup_cosine_lr_warmup_epochs=( + obj.warmup_cosine_lr_warmup_epochs if hasattr(obj, "warmup_cosine_lr_warmup_epochs") else None + ), + weight_decay=obj.weight_decay if hasattr(obj, "weight_decay") else None, + box_detections_per_image=obj.box_detections_per_image if hasattr(obj, "box_detections_per_image") else None, + box_score_threshold=obj.box_score_threshold if hasattr(obj, "box_score_threshold") else None, + image_size=obj.image_size if hasattr(obj, "image_size") else None, + max_size=obj.max_size if hasattr(obj, "max_size") else None, + min_size=obj.min_size if hasattr(obj, "min_size") else None, + model_size=obj.model_size if hasattr(obj, "model_size") else None, + multi_scale=obj.multi_scale if hasattr(obj, "multi_scale") else None, + nms_iou_threshold=obj.nms_iou_threshold if hasattr(obj, "nms_iou_threshold") else None, + tile_grid_size=obj.tile_grid_size if hasattr(obj, "tile_grid_size") else None, + tile_overlap_ratio=obj.tile_overlap_ratio if hasattr(obj, "tile_overlap_ratio") else None, + tile_predictions_nms_threshold=( + obj.tile_predictions_nms_threshold if hasattr(obj, "tile_predictions_nms_threshold") else None + ), + validation_iou_threshold=obj.validation_iou_threshold if hasattr(obj, "validation_iou_threshold") else None, + validation_metric_type=obj.validation_metric_type if hasattr(obj, "validation_metric_type") else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageObjectDetectionSearchSpace): + return NotImplemented + + return ( + self.ams_gradient == other.ams_gradient + and self.beta1 == other.beta1 + and self.beta2 == other.beta2 + and self.distributed == other.distributed + and self.early_stopping == other.early_stopping + and self.early_stopping_delay == other.early_stopping_delay + and self.early_stopping_patience == other.early_stopping_patience + and self.enable_onnx_normalization == other.enable_onnx_normalization + and self.evaluation_frequency == other.evaluation_frequency + and self.gradient_accumulation_step == other.gradient_accumulation_step + and self.layers_to_freeze == other.layers_to_freeze + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.momentum == other.momentum + and self.nesterov == other.nesterov + and self.number_of_epochs == other.number_of_epochs + and self.number_of_workers == other.number_of_workers + and self.optimizer == other.optimizer + and self.random_seed == other.random_seed + and self.step_lr_gamma == other.step_lr_gamma + and self.step_lr_step_size == other.step_lr_step_size + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_cosine_lr_cycles == other.warmup_cosine_lr_cycles + and self.warmup_cosine_lr_warmup_epochs == other.warmup_cosine_lr_warmup_epochs + and self.weight_decay == other.weight_decay + and self.box_detections_per_image == other.box_detections_per_image + and self.box_score_threshold == other.box_score_threshold + and self.image_size == other.image_size + and self.max_size == other.max_size + and self.min_size == other.min_size + and self.model_size == other.model_size + and self.multi_scale == other.multi_scale + and self.nms_iou_threshold == other.nms_iou_threshold + and self.tile_grid_size == other.tile_grid_size + and self.tile_overlap_ratio == other.tile_overlap_ratio + and self.tile_predictions_nms_threshold == other.tile_predictions_nms_threshold + and self.validation_iou_threshold == other.validation_iou_threshold + and self.validation_metric_type == other.validation_metric_type + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_sweep_settings.py new file mode 100644 index 00000000..b5e9ffaf --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_sweep_settings.py @@ -0,0 +1,86 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageSweepSettings as RestImageSweepSettings +from azure.ai.ml._restclient.v2023_04_01_preview.models import SamplingAlgorithmType +from azure.ai.ml.entities._job.sweep.early_termination_policy import ( + BanditPolicy, + EarlyTerminationPolicy, + MedianStoppingPolicy, + TruncationSelectionPolicy, +) +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ImageSweepSettings(RestTranslatableMixin): + """Sweep settings for all AutoML Image Verticals. + + :keyword sampling_algorithm: Required. Type of the hyperparameter sampling. + algorithms. Possible values include: "Grid", "Random", "Bayesian". + :paramtype sampling_algorithm: Union[ + str, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.GRID, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.BAYESIAN, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.RANDOM + + ] + :keyword early_termination: Type of early termination policy. + :paramtype early_termination: Union[ + + ~azure.mgmt.machinelearningservices.models.BanditPolicy, + ~azure.mgmt.machinelearningservices.models.MedianStoppingPolicy, + ~azure.mgmt.machinelearningservices.models.TruncationSelectionPolicy + + ] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_sweep_settings] + :end-before: [END automl.automl_image_job.image_sweep_settings] + :language: python + :dedent: 8 + :caption: Defining the sweep settings for an automl image job. + """ + + def __init__( + self, + *, + sampling_algorithm: Union[ + str, SamplingAlgorithmType.GRID, SamplingAlgorithmType.BAYESIAN, SamplingAlgorithmType.RANDOM + ], + early_termination: Optional[ + Union[EarlyTerminationPolicy, BanditPolicy, MedianStoppingPolicy, TruncationSelectionPolicy] + ] = None, + ): + self.sampling_algorithm = sampling_algorithm + self.early_termination = early_termination + + def _to_rest_object(self) -> RestImageSweepSettings: + return RestImageSweepSettings( + sampling_algorithm=self.sampling_algorithm, + early_termination=self.early_termination._to_rest_object() if self.early_termination else None, + ) + + @classmethod + def _from_rest_object(cls, obj: RestImageSweepSettings) -> "ImageSweepSettings": + return cls( + sampling_algorithm=obj.sampling_algorithm, + early_termination=( + EarlyTerminationPolicy._from_rest_object(obj.early_termination) if obj.early_termination else None + ), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageSweepSettings): + return NotImplemented + + return self.sampling_algorithm == other.sampling_algorithm and self.early_termination == other.early_termination + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py new file mode 100644 index 00000000..9be7b483 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py @@ -0,0 +1,25 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from .automl_nlp_job import AutoMLNLPJob +from .nlp_featurization_settings import NlpFeaturizationSettings +from .nlp_fixed_parameters import NlpFixedParameters +from .nlp_limit_settings import NlpLimitSettings +from .nlp_search_space import NlpSearchSpace +from .nlp_sweep_settings import NlpSweepSettings +from .text_classification_job import TextClassificationJob +from .text_classification_multilabel_job import TextClassificationMultilabelJob +from .text_ner_job import TextNerJob + +__all__ = [ + "AutoMLNLPJob", + "NlpFeaturizationSettings", + "NlpFixedParameters", + "NlpLimitSettings", + "NlpSearchSpace", + "NlpSweepSettings", + "TextClassificationJob", + "TextClassificationMultilabelJob", + "TextNerJob", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py new file mode 100644 index 00000000..f0b3baa8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py @@ -0,0 +1,467 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from abc import ABC +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LogVerbosity, + NlpLearningRateScheduler, + SamplingAlgorithmType, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.automl_vertical import AutoMLVertical +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_search_space import NlpSearchSpace +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.utils import cast_to_specific_search_space +from azure.ai.ml.entities._job.sweep.early_termination_policy import EarlyTerminationPolicy +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +# pylint: disable=too-many-instance-attributes,protected-access +class AutoMLNLPJob(AutoMLVertical, ABC): + """Base class for AutoML NLP jobs. + + You should not instantiate this class directly. Instead you should + create classes for specific NLP Jobs. + + :param task_type: NLP task type, must be one of 'TextClassification', + 'TextClassificationMultilabel', or 'TextNER' + :type task_type: str + :param primary_metric: Primary metric to display from NLP job + :type primary_metric: str + :param training_data: Training data + :type training_data: Input + :param validation_data: Validation data + :type validation_data: Input + :param target_column_name: Column name of the target column, defaults to None + :type target_column_name: Optional[str] + :param log_verbosity: The degree of verbosity used in logging, defaults to None, + must be one of 'NotSet', 'Debug', 'Info', 'Warning', 'Error', 'Critical', or None + :type log_verbosity: Optional[str] + :param featurization: Featurization settings used for NLP job, defaults to None + :type featurization: Optional[~azure.ai.ml.automl.NlpFeaturizationSettings] + :param limits: Limit settings for NLP jobs, defaults to None + :type limits: Optional[~azure.ai.ml.automl.NlpLimitSettings] + :param sweep: Sweep settings used for NLP job, defaults to None + :type sweep: Optional[~azure.ai.ml.automl.NlpSweepSettings] + :param training_parameters: Fixed parameters for the training of all candidates. + , defaults to None + :type training_parameters: Optional[~azure.ai.ml.automl.NlpFixedParameters] + :param search_space: Search space(s) to sweep over for NLP sweep jobs, defaults to None + :type search_space: Optional[List[~azure.ai.ml.automl.NlpSearchSpace]] + """ + + def __init__( + self, + *, + task_type: str, + primary_metric: str, + training_data: Optional[Input], + validation_data: Optional[Input], + target_column_name: Optional[str] = None, + log_verbosity: Optional[str] = None, + featurization: Optional[NlpFeaturizationSettings] = None, + limits: Optional[NlpLimitSettings] = None, + sweep: Optional[NlpSweepSettings] = None, + training_parameters: Optional[NlpFixedParameters] = None, + search_space: Optional[List[NlpSearchSpace]] = None, + **kwargs: Any, + ): + self._training_parameters: Optional[NlpFixedParameters] = None + + super().__init__( + task_type, training_data=training_data, validation_data=validation_data, **kwargs # type: ignore + ) + self.log_verbosity = log_verbosity + self._primary_metric: str = "" + self.primary_metric = primary_metric + + self.target_column_name = target_column_name + + self._featurization = featurization + self._limits = limits or NlpLimitSettings() + self._sweep = sweep + self.training_parameters = training_parameters # via setter method. + self._search_space = search_space + + @property + def training_parameters(self) -> Optional[NlpFixedParameters]: + """Parameters that are used for all submitted jobs. + + :return: fixed training parameters for NLP jobs + :rtype: ~azure.ai.ml.automl.NlpFixedParameters + """ + return self._training_parameters + + @training_parameters.setter + def training_parameters(self, value: Union[Dict, NlpFixedParameters]) -> None: + if value is None: + self._training_parameters = None + elif isinstance(value, NlpFixedParameters): + self._training_parameters = value + # Convert parameters from snake case to enum. + self.set_training_parameters(learning_rate_scheduler=value.learning_rate_scheduler) + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for nlp training parameters." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training_parameters(**value) + + @property + def search_space(self) -> Optional[List[NlpSearchSpace]]: + """Search space(s) to sweep over for NLP sweep jobs + + :return: list of search spaces to sweep over for NLP jobs + :rtype: List[~azure.ai.ml.automl.NlpSearchSpace] + """ + return self._search_space + + @search_space.setter + def search_space(self, value: Union[List[dict], List[SearchSpace]]) -> None: + if not isinstance(value, list): + msg = "Expected a list for search space." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + all_dict_type = all(isinstance(item, dict) for item in value) + all_search_space_type = all(isinstance(item, SearchSpace) for item in value) + + if not (all_search_space_type or all_dict_type): + msg = "Expected all items in the list to be either dictionaries or SearchSpace objects." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + self._search_space = [ + cast_to_specific_search_space(item, NlpSearchSpace, self.task_type) for item in value # type: ignore + ] + + @property + def primary_metric(self) -> str: + """Primary metric to display from NLP job + + :return: primary metric to display + :rtype: str + """ + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: str) -> None: + self._primary_metric = value + + @property + def log_verbosity(self) -> LogVerbosity: + """Log verbosity configuration + + :return: the degree of verbosity used in logging + :rtype: ~azure.mgmt.machinelearningservices.models.LogVerbosity + """ + return self._log_verbosity + + @log_verbosity.setter + def log_verbosity(self, value: Union[str, LogVerbosity]) -> None: + self._log_verbosity = None if value is None else LogVerbosity[camel_to_snake(value).upper()] + + @property + def limits(self) -> NlpLimitSettings: + """Limit settings for NLP jobs + + :return: limit configuration for NLP job + :rtype: ~azure.ai.ml.automl.NlpLimitSettings + """ + return self._limits + + @limits.setter + def limits(self, value: Union[Dict, NlpLimitSettings]) -> None: + if isinstance(value, NlpLimitSettings): + self._limits = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for limit settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_limits(**value) + + @property + def sweep(self) -> Optional[NlpSweepSettings]: + """Sweep settings used for NLP job + + :return: sweep settings + :rtype: ~azure.ai.ml.automl.NlpSweepSettings + """ + return self._sweep + + @sweep.setter + def sweep(self, value: Union[Dict, NlpSweepSettings]) -> None: + if isinstance(value, NlpSweepSettings): + self._sweep = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for sweep settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_sweep(**value) + + @property + def featurization(self) -> Optional[NlpFeaturizationSettings]: + """Featurization settings used for NLP job + + :return: featurization settings + :rtype: ~azure.ai.ml.automl.NlpFeaturizationSettings + """ + return self._featurization + + @featurization.setter + def featurization(self, value: Union[Dict, NlpFeaturizationSettings]) -> None: + if isinstance(value, NlpFeaturizationSettings): + self._featurization = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for featurization settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_featurization(**value) + + def set_data(self, *, training_data: Input, target_column_name: str, validation_data: Input) -> None: + """Define data configuration for NLP job + + :keyword training_data: Training data + :type training_data: ~azure.ai.ml.Input + :keyword target_column_name: Column name of the target column. + :type target_column_name: str + :keyword validation_data: Validation data + :type validation_data: ~azure.ai.ml.Input + """ + # Properties for NlpVerticalDataSettings + self.target_column_name = target_column_name + self.training_data = training_data + self.validation_data = validation_data + + def set_limits( + self, + *, + max_trials: int = 1, + max_concurrent_trials: int = 1, + max_nodes: int = 1, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ) -> None: + """Define limit configuration for AutoML NLP job + + :keyword max_trials: Maximum number of AutoML iterations, defaults to 1 + :type max_trials: int, optional + :keyword max_concurrent_trials: Maximum number of concurrent AutoML iterations, defaults to 1 + :type max_concurrent_trials: int, optional + :keyword max_nodes: Maximum number of nodes used for sweep, defaults to 1 + :type max_nodes: int, optional + :keyword timeout_minutes: Timeout for the AutoML job, defaults to None + :type timeout_minutes: Optional[int] + :keyword trial_timeout_minutes: Timeout for each AutoML trial, defaults to None + :type trial_timeout_minutes: Optional[int] + """ + self._limits = NlpLimitSettings( + max_trials=max_trials, + max_concurrent_trials=max_concurrent_trials, + max_nodes=max_nodes, + timeout_minutes=timeout_minutes, + trial_timeout_minutes=trial_timeout_minutes, + ) + + def set_sweep( + self, + *, + sampling_algorithm: Union[str, SamplingAlgorithmType], + early_termination: Optional[EarlyTerminationPolicy] = None, + ) -> None: + """Define sweep configuration for AutoML NLP job + + :keyword sampling_algorithm: Required. Specifies type of hyperparameter sampling algorithm. + Possible values include: "Grid", "Random", and "Bayesian". + :type sampling_algorithm: Union[str, ~azure.ai.ml.automl.SamplingAlgorithmType] + :keyword early_termination: Optional. early termination policy to end poorly performing training candidates, + defaults to None. + :type early_termination: Optional[~azure.mgmt.machinelearningservices.models.EarlyTerminationPolicy] + """ + if self._sweep: + self._sweep.sampling_algorithm = sampling_algorithm + else: + self._sweep = NlpSweepSettings(sampling_algorithm=sampling_algorithm) + + self._sweep.early_termination = early_termination or self._sweep.early_termination + + def set_training_parameters( + self, + *, + gradient_accumulation_steps: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[Union[str, NlpLearningRateScheduler]] = None, + model_name: Optional[str] = None, + number_of_epochs: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_ratio: Optional[float] = None, + weight_decay: Optional[float] = None, + ) -> None: + """Fix certain training parameters throughout the training procedure for all candidates. + + :keyword gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer., defaults to None + :type gradient_accumulation_steps: Optional[int] + :keyword learning_rate: initial learning rate. Must be a float in (0, 1)., defaults to None + :type learning_rate: Optional[float] + :keyword learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup'., defaults to None + :type learning_rate_scheduler: Optional[Union[str, ~azure.ai.ml.automl.NlpLearningRateScheduler]] + :keyword model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased'., + defaults to None + :type model_name: Optional[str] + :keyword number_of_epochs: the number of epochs to train with. Must be a positive integer., defaults to None + :type number_of_epochs: Optional[int] + :keyword training_batch_size: the batch size during training. Must be a positive integer., defaults to None + :type training_batch_size: Optional[int] + :keyword validation_batch_size: the batch size during validation. Must be a positive integer., defaults to None + :type validation_batch_size: Optional[int] + :keyword warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1]., defaults to None + :type warmup_ratio: Optional[float] + :keyword weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1]., defaults to None + :type weight_decay: Optional[float] + """ + self._training_parameters = self._training_parameters or NlpFixedParameters() + + self._training_parameters.gradient_accumulation_steps = ( + gradient_accumulation_steps + if gradient_accumulation_steps is not None + else self._training_parameters.gradient_accumulation_steps + ) + + self._training_parameters.learning_rate = ( + learning_rate if learning_rate is not None else self._training_parameters.learning_rate + ) + + self._training_parameters.learning_rate_scheduler = ( + NlpLearningRateScheduler[camel_to_snake(learning_rate_scheduler).upper()] + if learning_rate_scheduler is not None + else self._training_parameters.learning_rate_scheduler + ) + + self._training_parameters.model_name = ( + model_name if model_name is not None else self._training_parameters.model_name + ) + + self._training_parameters.number_of_epochs = ( + number_of_epochs if number_of_epochs is not None else self._training_parameters.number_of_epochs + ) + + self._training_parameters.training_batch_size = ( + training_batch_size if training_batch_size is not None else self._training_parameters.training_batch_size + ) + + self._training_parameters.validation_batch_size = ( + validation_batch_size + if validation_batch_size is not None + else self._training_parameters.validation_batch_size + ) + + self._training_parameters.warmup_ratio = ( + warmup_ratio if warmup_ratio is not None else self._training_parameters.warmup_ratio + ) + + self._training_parameters.weight_decay = ( + weight_decay if weight_decay is not None else self._training_parameters.weight_decay + ) + + def set_featurization(self, *, dataset_language: Optional[str] = None) -> None: + """Define featurization configuration for AutoML NLP job. + + :keyword dataset_language: Language of the dataset, defaults to None + :type dataset_language: Optional[str] + """ + self._featurization = NlpFeaturizationSettings( + dataset_language=dataset_language, + ) + + def extend_search_space(self, value: Union[SearchSpace, List[SearchSpace]]) -> None: + """Add (a) search space(s) for an AutoML NLP job. + + :param value: either a SearchSpace object or a list of SearchSpace objects with nlp-specific parameters. + :type value: Union[~azure.ai.ml.automl.SearchSpace, List[~azure.ai.ml.automl.SearchSpace]] + """ + self._search_space = self._search_space or [] + if isinstance(value, list): + self._search_space.extend( + [cast_to_specific_search_space(item, NlpSearchSpace, self.task_type) for item in value] # type: ignore + ) + else: + self._search_space.append( + cast_to_specific_search_space(value, NlpSearchSpace, self.task_type) # type: ignore + ) + + @classmethod + def _get_search_space_from_str(cls, search_space_str: Optional[str]) -> Optional[List]: + if search_space_str is not None: + return [NlpSearchSpace._from_rest_object(entry) for entry in search_space_str if entry is not None] + return None + + def _restore_data_inputs(self) -> None: + """Restore MLTableJobInputs to Inputs within data_settings. + + self.training_data and self.validation_data should reflect what user passed in (Input) Once we get response back + from service (as MLTableJobInput), we should set responsible ones back to Input + """ + super()._restore_data_inputs() + self.training_data = self.training_data if self.training_data else None # type: ignore + self.validation_data = self.validation_data if self.validation_data else None # type: ignore + + def __eq__(self, other: object) -> bool: + if not isinstance(other, AutoMLNLPJob): + return NotImplemented + + return ( + self.primary_metric == other.primary_metric + and self.log_verbosity == other.log_verbosity + and self.training_data == other.training_data + and self.validation_data == other.validation_data + and self._featurization == other._featurization + and self._limits == other._limits + and self._sweep == other._sweep + and self._training_parameters == other._training_parameters + and self._search_space == other._search_space + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py new file mode 100644 index 00000000..5649dea2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py @@ -0,0 +1,47 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + NlpVerticalFeaturizationSettings as RestNlpVerticalFeaturizationSettings, +) +from azure.ai.ml.entities._job.automl.featurization_settings import FeaturizationSettings, FeaturizationSettingsType + + +class NlpFeaturizationSettings(FeaturizationSettings): + """Featurization settings for all AutoML NLP Verticals. + + :ivar type: Specifies the type of FeaturizationSettings. Set automatically to "NLP" for this class. + :vartype type: str + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_featurization_settings] + :end-before: [END automl.nlp_featurization_settings] + :language: python + :dedent: 8 + :caption: creating an nlp featurization settings + """ + + type = FeaturizationSettingsType.NLP + + def _to_rest_object(self) -> RestNlpVerticalFeaturizationSettings: + return RestNlpVerticalFeaturizationSettings( + dataset_language=self.dataset_language, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpVerticalFeaturizationSettings) -> "NlpFeaturizationSettings": + return cls( + dataset_language=obj.dataset_language, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpFeaturizationSettings): + return NotImplemented + + return super().__eq__(other) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py new file mode 100644 index 00000000..13c594b6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py @@ -0,0 +1,117 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpFixedParameters as RestNlpFixedParameters +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpFixedParameters(RestTranslatableMixin): + """Configuration of fixed parameters for all candidates of an AutoML NLP Job + + :param gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer, defaults to None + :type gradient_accumulation_steps: Optional[int] + :param learning_rate: initial learning rate. Must be a float in (0, 1), defaults to None + :type learning_rate: Optional[float] + :param learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup', defaults to None + :type learning_rate_scheduler: Optional[str] + :param model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased', + defaults to None + :type model_name: Optional[str] + :param number_of_epochs: the number of epochs to train with. Must be a positive integer, defaults to None + :type number_of_epochs: Optional[int] + :param training_batch_size: the batch size during training. Must be a positive integer, defaults to None + :type training_batch_size: Optional[int] + :param validation_batch_size: the batch size during validation. Must be a positive integer, defaults to None + :type validation_batch_size: Optional[int] + :param warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1], defaults to None + :type warmup_ratio: Optional[float] + :param weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1] defaults to None + :type weight_decay: Optional[float] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_fixed_parameters] + :end-before: [END automl.nlp_fixed_parameters] + :language: python + :dedent: 8 + :caption: creating an nlp fixed parameters + """ + + def __init__( + self, + *, + gradient_accumulation_steps: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[str] = None, + model_name: Optional[str] = None, + number_of_epochs: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_ratio: Optional[float] = None, + weight_decay: Optional[float] = None, + ): + self.gradient_accumulation_steps = gradient_accumulation_steps + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.number_of_epochs = number_of_epochs + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_ratio = warmup_ratio + self.weight_decay = weight_decay + + def _to_rest_object(self) -> RestNlpFixedParameters: + return RestNlpFixedParameters( + gradient_accumulation_steps=self.gradient_accumulation_steps, + learning_rate=self.learning_rate, + learning_rate_scheduler=self.learning_rate_scheduler, + model_name=self.model_name, + number_of_epochs=self.number_of_epochs, + training_batch_size=self.training_batch_size, + validation_batch_size=self.validation_batch_size, + warmup_ratio=self.warmup_ratio, + weight_decay=self.weight_decay, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpFixedParameters) -> "NlpFixedParameters": + return cls( + gradient_accumulation_steps=obj.gradient_accumulation_steps, + learning_rate=obj.learning_rate, + learning_rate_scheduler=obj.learning_rate_scheduler, + model_name=obj.model_name, + number_of_epochs=obj.number_of_epochs, + training_batch_size=obj.training_batch_size, + validation_batch_size=obj.validation_batch_size, + warmup_ratio=obj.warmup_ratio, + weight_decay=obj.weight_decay, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpFixedParameters): + return NotImplemented + + return ( + self.gradient_accumulation_steps == other.gradient_accumulation_steps + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.number_of_epochs == other.number_of_epochs + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_ratio == other.warmup_ratio + and self.weight_decay == other.weight_decay + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py new file mode 100644 index 00000000..1e99f4f0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py @@ -0,0 +1,79 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpVerticalLimitSettings as RestNlpLimitSettings +from azure.ai.ml._utils.utils import from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpLimitSettings(RestTranslatableMixin): + """Limit settings for all AutoML NLP Verticals. + + :param max_concurrent_trials: Maximum number of concurrent AutoML iterations. + :type max_concurrent_trials: int + :param max_trials: Maximum number of AutoML iterations. + :type max_trials: int + :param timeout_minutes: AutoML job timeout. + :type timeout_minutes: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_limit_settings] + :end-before: [END automl.nlp_limit_settings] + :language: python + :dedent: 8 + :caption: creating an nlp limit settings + """ + + def __init__( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_trials: int = 1, + max_nodes: int = 1, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ): + self.max_concurrent_trials = max_concurrent_trials + self.max_trials = max_trials + self.max_nodes = max_nodes + self.timeout_minutes = timeout_minutes + self.trial_timeout_minutes = trial_timeout_minutes + + def _to_rest_object(self) -> RestNlpLimitSettings: + return RestNlpLimitSettings( + max_concurrent_trials=self.max_concurrent_trials, + max_trials=self.max_trials, + max_nodes=self.max_nodes, + timeout=to_iso_duration_format_mins(self.timeout_minutes), + trial_timeout=to_iso_duration_format_mins(self.trial_timeout_minutes), + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpLimitSettings) -> "NlpLimitSettings": + return cls( + max_concurrent_trials=obj.max_concurrent_trials, + max_trials=obj.max_trials, + max_nodes=obj.max_nodes, + timeout_minutes=from_iso_duration_format_mins(obj.timeout), + trial_timeout_minutes=from_iso_duration_format_mins(obj.trial_timeout), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpLimitSettings): + return NotImplemented + + return ( + self.max_concurrent_trials == other.max_concurrent_trials + and self.max_trials == other.max_trials + and self.max_nodes == other.max_nodes + and self.timeout_minutes == other.timeout_minutes + and self.trial_timeout_minutes == other.trial_timeout_minutes + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py new file mode 100644 index 00000000..e4ad435f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py @@ -0,0 +1,185 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler, NlpParameterSubspace +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants import NlpModels +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.search_space_utils import _convert_from_rest_object, _convert_to_rest_object +from azure.ai.ml.entities._job.sweep.search_space import Choice, SweepDistribution +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpSearchSpace(RestTranslatableMixin): + """Search space for AutoML NLP tasks. + + :param gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer., defaults to None + :type gradient_accumulation_steps: Optional[Union[int, SweepDistribution]] + :param learning_rate: initial learning rate. Must be a float in (0, 1), defaults to None + :type learning_rate: Optional[Union[float, SweepDistribution]] + :param learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup', defaults to None + :type learning_rate_scheduler: Optional[Union[str, SweepDistribution]] + :param model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased', + defaults to None + :type model_name: Optional[Union[str, SweepDistribution]] + :param number_of_epochs: the number of epochs to train with. Must be a positive integer, defaults to None + :type number_of_epochs: Optional[Union[int, SweepDistribution]] + :param training_batch_size: the batch size during training. Must be a positive integer, defaults to None + :type training_batch_size: Optional[Union[int, SweepDistribution]] + :param validation_batch_size: the batch size during validation. Must be a positive integer, defaults to None + :type validation_batch_size: Optional[Union[int, SweepDistribution]] + :param warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1], defaults to None + :type warmup_ratio: Optional[Union[float, SweepDistribution]] + :param weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1], defaults to None + :type weight_decay: Optional[Union[float, SweepDistribution]] + + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_search_space] + :end-before: [END automl.nlp_search_space] + :language: python + :dedent: 8 + :caption: creating an nlp search space + """ + + def __init__( + self, + *, + gradient_accumulation_steps: Optional[Union[int, SweepDistribution]] = None, + learning_rate: Optional[Union[float, SweepDistribution]] = None, + learning_rate_scheduler: Optional[Union[str, SweepDistribution]] = None, + model_name: Optional[Union[str, SweepDistribution]] = None, + number_of_epochs: Optional[Union[int, SweepDistribution]] = None, + training_batch_size: Optional[Union[int, SweepDistribution]] = None, + validation_batch_size: Optional[Union[int, SweepDistribution]] = None, + warmup_ratio: Optional[Union[float, SweepDistribution]] = None, + weight_decay: Optional[Union[float, SweepDistribution]] = None + ): + # Since we want customers to be able to specify enums as well rather than just strings, we need to access + # the enum values here before we serialize them ('NlpModels.BERT_BASE_CASED' vs. 'bert-base-cased'). + if isinstance(learning_rate_scheduler, NlpLearningRateScheduler): + learning_rate_scheduler = camel_to_snake(learning_rate_scheduler.value) + elif isinstance(learning_rate_scheduler, Choice): + if learning_rate_scheduler.values is not None: + learning_rate_scheduler.values = [ + camel_to_snake(item.value) if isinstance(item, NlpLearningRateScheduler) else item + for item in learning_rate_scheduler.values + ] + + if isinstance(model_name, NlpModels): + model_name = model_name.value + elif isinstance(model_name, Choice): + if model_name.values is not None: + model_name.values = [item.value if isinstance(item, NlpModels) else item for item in model_name.values] + + self.gradient_accumulation_steps = gradient_accumulation_steps + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.number_of_epochs = number_of_epochs + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_ratio = warmup_ratio + self.weight_decay = weight_decay + + def _to_rest_object(self) -> NlpParameterSubspace: + return NlpParameterSubspace( + gradient_accumulation_steps=( + _convert_to_rest_object(self.gradient_accumulation_steps) + if self.gradient_accumulation_steps is not None + else None + ), + learning_rate=_convert_to_rest_object(self.learning_rate) if self.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_to_rest_object(self.learning_rate_scheduler) + if self.learning_rate_scheduler is not None + else None + ), + model_name=_convert_to_rest_object(self.model_name) if self.model_name is not None else None, + number_of_epochs=( + _convert_to_rest_object(self.number_of_epochs) if self.number_of_epochs is not None else None + ), + training_batch_size=( + _convert_to_rest_object(self.training_batch_size) if self.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_to_rest_object(self.validation_batch_size) if self.validation_batch_size is not None else None + ), + warmup_ratio=_convert_to_rest_object(self.warmup_ratio) if self.warmup_ratio is not None else None, + weight_decay=_convert_to_rest_object(self.weight_decay) if self.weight_decay is not None else None, + ) + + @classmethod + def _from_rest_object(cls, obj: NlpParameterSubspace) -> "NlpSearchSpace": + return cls( + gradient_accumulation_steps=( + _convert_from_rest_object(obj.gradient_accumulation_steps) + if obj.gradient_accumulation_steps is not None + else None + ), + learning_rate=_convert_from_rest_object(obj.learning_rate) if obj.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_from_rest_object(obj.learning_rate_scheduler) + if obj.learning_rate_scheduler is not None + else None + ), + model_name=_convert_from_rest_object(obj.model_name) if obj.model_name is not None else None, + number_of_epochs=( + _convert_from_rest_object(obj.number_of_epochs) if obj.number_of_epochs is not None else None + ), + training_batch_size=( + _convert_from_rest_object(obj.training_batch_size) if obj.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_from_rest_object(obj.validation_batch_size) if obj.validation_batch_size is not None else None + ), + warmup_ratio=_convert_from_rest_object(obj.warmup_ratio) if obj.warmup_ratio is not None else None, + weight_decay=_convert_from_rest_object(obj.weight_decay) if obj.weight_decay is not None else None, + ) + + @classmethod + def _from_search_space_object(cls, obj: SearchSpace) -> "NlpSearchSpace": + return cls( + gradient_accumulation_steps=( + obj.gradient_accumulation_steps if hasattr(obj, "gradient_accumulation_steps") else None + ), + learning_rate=obj.learning_rate if hasattr(obj, "learning_rate") else None, + learning_rate_scheduler=obj.learning_rate_scheduler if hasattr(obj, "learning_rate_scheduler") else None, + model_name=obj.model_name if hasattr(obj, "model_name") else None, + number_of_epochs=obj.number_of_epochs if hasattr(obj, "number_of_epochs") else None, + training_batch_size=obj.training_batch_size if hasattr(obj, "training_batch_size") else None, + validation_batch_size=obj.validation_batch_size if hasattr(obj, "validation_batch_size") else None, + warmup_ratio=obj.warmup_ratio if hasattr(obj, "warmup_ratio") else None, + weight_decay=obj.weight_decay if hasattr(obj, "weight_decay") else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpSearchSpace): + return NotImplemented + + return ( + self.gradient_accumulation_steps == other.gradient_accumulation_steps + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.number_of_epochs == other.number_of_epochs + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_ratio == other.warmup_ratio + and self.weight_decay == other.weight_decay + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py new file mode 100644 index 00000000..e446a30c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py @@ -0,0 +1,65 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpSweepSettings as RestNlpSweepSettings +from azure.ai.ml._restclient.v2023_04_01_preview.models import SamplingAlgorithmType +from azure.ai.ml.entities._job.sweep.early_termination_policy import EarlyTerminationPolicy +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +# pylint: disable=protected-access +class NlpSweepSettings(RestTranslatableMixin): + """Sweep settings for all AutoML NLP tasks. + + :param sampling_algorithm: Required. Specifies type of hyperparameter sampling algorithm. + Possible values include: "Grid", "Random", and "Bayesian". + :type sampling_algorithm: Union[str, ~azure.ai.ml.automl.SamplingAlgorithmType] + :param early_termination: Early termination policy to end poorly performing training candidates, + defaults to None. + :type early_termination: Optional[~azure.mgmt.machinelearningservices.models.EarlyTerminationPolicy] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_sweep_settings] + :end-before: [END automl.nlp_sweep_settings] + :language: python + :dedent: 8 + :caption: creating an nlp sweep settings + """ + + def __init__( + self, + *, + sampling_algorithm: Union[str, SamplingAlgorithmType], + early_termination: Optional[EarlyTerminationPolicy] = None, + ): + self.sampling_algorithm = sampling_algorithm + self.early_termination = early_termination + + def _to_rest_object(self) -> RestNlpSweepSettings: + return RestNlpSweepSettings( + sampling_algorithm=self.sampling_algorithm, + early_termination=self.early_termination._to_rest_object() if self.early_termination else None, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpSweepSettings) -> "NlpSweepSettings": + return cls( + sampling_algorithm=obj.sampling_algorithm, + early_termination=( + EarlyTerminationPolicy._from_rest_object(obj.early_termination) if obj.early_termination else None + ), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpSweepSettings): + return NotImplemented + + return self.sampling_algorithm == other.sampling_algorithm and self.early_termination == other.early_termination + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py new file mode 100644 index 00000000..290f4f70 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py @@ -0,0 +1,248 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._restclient.v2023_04_01_preview.models._azure_machine_learning_workspaces_enums import ( + ClassificationPrimaryMetrics, +) +from azure.ai.ml._restclient.v2024_01_01_preview.models import TextClassification as RestTextClassification +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextClassificationJob(AutoMLNLPJob): + """Configuration for AutoML Text Classification Job. + + :param target_column_name: The name of the target column, defaults to None + :type target_column_name: Optional[str] + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed, defaults to None + :type primary_metric: Optional[~azure.ai.ml.automl.ClassificationPrimaryMetrics] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.automl_nlp_job.text_classification_job] + :end-before: [END automl.automl_nlp_job.text_classification_job] + :language: python + :dedent: 8 + :caption: creating an automl text classification job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + target_column_name: Optional[str] = None, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[ClassificationPrimaryMetrics] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super().__init__( + task_type=TaskType.TEXT_CLASSIFICATION, + primary_metric=primary_metric or TextClassificationJob._DEFAULT_PRIMARY_METRIC, + target_column_name=target_column_name, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + """setter for primary metric + + :param value: _description_ + :type value: Union[str, ClassificationPrimaryMetrics] + """ + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextClassificationJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_classification = RestTextClassification( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_classification) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_classification, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextClassificationJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextClassification = properties.task_details + assert isinstance(task_details, RestTextClassification) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_classification_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_classification_job._restore_data_inputs() + + return text_classification_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "TextClassificationJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_classification import TextClassificationSchema + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationNode + + loaded_data = load_from_dict( + AutoMLTextClassificationNode, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(TextClassificationSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextClassificationJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextClassificationJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_classification import TextClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextClassificationNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextClassificationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextClassificationJob): + return NotImplemented + + if not super(TextClassificationJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py new file mode 100644 index 00000000..ac19b451 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py @@ -0,0 +1,252 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationMultilabelPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + TextClassificationMultilabel as RestTextClassificationMultilabel, +) +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextClassificationMultilabelJob(AutoMLNLPJob): + """Configuration for AutoML Text Classification Multilabel Job. + + :param target_column_name: The name of the target column, defaults to None + :type target_column_name: Optional[str] + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed., defaults to None + :type primary_metric: Optional[str] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_classification_multilabel_job] + :end-before: [END automl.text_classification_multilabel_job] + :language: python + :dedent: 8 + :caption: creating an automl text classification multilabel job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationMultilabelPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + target_column_name: Optional[str] = None, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super().__init__( + task_type=TaskType.TEXT_CLASSIFICATION_MULTILABEL, + primary_metric=primary_metric or TextClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC, + target_column_name=target_column_name, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationMultilabelPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationMultilabelPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationMultilabelPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_classification_multilabel = RestTextClassificationMultilabel( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_classification_multilabel) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_classification_multilabel, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextClassificationMultilabelJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextClassificationMultilabel = properties.task_details + assert isinstance(task_details, RestTextClassificationMultilabel) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_classification_multilabel_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_classification_multilabel_job._restore_data_inputs() + + return text_classification_multilabel_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "TextClassificationMultilabelJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_classification_multilabel import ( + TextClassificationMultilabelSchema, + ) + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationMultilabelNode + + loaded_data = load_from_dict( + AutoMLTextClassificationMultilabelNode, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict( + TextClassificationMultilabelSchema, + data, + context, + additional_message, + **kwargs, + ) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextClassificationMultilabelJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextClassificationMultilabelJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_classification_multilabel import ( + TextClassificationMultilabelSchema, + ) + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationMultilabelNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextClassificationMultilabelNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextClassificationMultilabelSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextClassificationMultilabelJob): + return NotImplemented + + if not super(TextClassificationMultilabelJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py new file mode 100644 index 00000000..a87965f1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py @@ -0,0 +1,231 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._restclient.v2023_04_01_preview.models._azure_machine_learning_workspaces_enums import ( + ClassificationPrimaryMetrics, +) +from azure.ai.ml._restclient.v2024_01_01_preview.models import TextNer as RestTextNER +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextNerJob(AutoMLNLPJob): + """Configuration for AutoML Text NER Job. + + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, + defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed, defaults to None + :type primary_metric: Optional[str] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_ner_job] + :end-before: [END automl.text_ner_job] + :language: python + :dedent: 8 + :caption: creating an automl text ner job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super(TextNerJob, self).__init__( + task_type=TaskType.TEXT_NER, + primary_metric=primary_metric or TextNerJob._DEFAULT_PRIMARY_METRIC, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextNerJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_ner = RestTextNER( + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_ner) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_ner, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextNerJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextNER = properties.task_details + assert isinstance(task_details, RestTextNER) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_ner_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_ner_job._restore_data_inputs() + + return text_ner_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "TextNerJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_ner import TextNerSchema + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextNerNode + + loaded_data = load_from_dict(AutoMLTextNerNode, data, context, additional_message, **kwargs) + else: + loaded_data = load_from_dict(TextNerSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextNerJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextNerJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_ner import TextNerSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextNerNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextNerNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextNerSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextNerJob): + return NotImplemented + + if not super(TextNerJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space.py new file mode 100644 index 00000000..a958de56 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space.py @@ -0,0 +1,14 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + + +from typing import Any + + +class SearchSpace: + """SearchSpace class for AutoML verticals.""" + + def __init__(self, **kwargs: Any) -> None: + for k, v in kwargs.items(): + self.__setattr__(k, v) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space_utils.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space_utils.py new file mode 100644 index 00000000..732030d4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space_utils.py @@ -0,0 +1,276 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import re +from typing import Any, List, Union + +from marshmallow import fields + +from azure.ai.ml._schema._sweep.search_space import ( + ChoiceSchema, + NormalSchema, + QNormalSchema, + QUniformSchema, + RandintSchema, + UniformSchema, +) +from azure.ai.ml._schema.core.fields import DumpableIntegerField, DumpableStringField, NestedField, UnionField +from azure.ai.ml._utils.utils import float_to_str +from azure.ai.ml.constants._job.sweep import SearchSpace +from azure.ai.ml.entities._job.sweep.search_space import ( + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + SweepDistribution, + Uniform, +) +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +def _convert_to_rest_object(sweep_distribution: Union[bool, int, float, str, SweepDistribution]) -> str: + if isinstance(sweep_distribution, float): + # Float requires some special handling for small values that get auto-represented with scientific notation. + res: str = float_to_str(sweep_distribution) + return res + if not isinstance(sweep_distribution, SweepDistribution): + # Convert [bool, float, str] types to str + return str(sweep_distribution) + + rest_object = sweep_distribution._to_rest_object() + if not isinstance(rest_object, list): + msg = "Rest Object for sweep distribution should be a list." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + if len(rest_object) <= 1: + msg = "Rest object for sweep distribution should contain at least two elements." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + sweep_distribution_type = rest_object[0] + sweep_distribution_args = [] + + if not isinstance(rest_object[1], list): + msg = "The second element of Rest object for sweep distribution should be a list." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + if sweep_distribution_type == SearchSpace.CHOICE: + # Rest objects for choice distribution are of format ["choice", [[0, 1, 2]]] + if not isinstance(rest_object[1][0], list): + msg = "The second element of Rest object for choice distribution should be a list of list." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + for value in rest_object[1][0]: + if isinstance(value, str): + sweep_distribution_args.append("'" + value + "'") + elif isinstance(value, float): + sweep_distribution_args.append(float_to_str(value)) + else: + sweep_distribution_args.append(str(value)) + else: + for value in rest_object[1]: + if isinstance(value, float): + sweep_distribution_args.append(float_to_str(value)) + else: + sweep_distribution_args.append(str(value)) + + sweep_distribution_str: str = sweep_distribution_type + "(" + sweep_distribution_str += ",".join(sweep_distribution_args) + sweep_distribution_str += ")" + return sweep_distribution_str + + +def _is_int(value: str) -> bool: + try: + int(value) + return True + except ValueError: + return False + + +def _is_float(value: str) -> bool: + try: + float(value) + return True + except ValueError: + return False + + +def _get_type_inferred_value(value: str) -> Union[bool, int, float, str]: + value = value.strip() + if _is_int(value): + # Int + return int(value) + if _is_float(value): + # Float + return float(value) + if value in ["True", "False"]: + # Convert "True", "False" to python boolean literals + return value == "True" + # string value. Remove quotes before returning. + return value.strip("'\"") + + +def _convert_from_rest_object( + sweep_distribution_str: str, +) -> Any: + # sweep_distribution_str can be a distribution like "choice('vitb16r224', 'vits16r224')" or + # a single value like "True", "1", "1.0567", "vitb16r224" + + sweep_distribution_str = sweep_distribution_str.strip() + # Filter by the delimiters and remove splits that are empty strings + sweep_distribution_separated = list(filter(None, re.split("[ ,()]+", sweep_distribution_str))) + + if len(sweep_distribution_separated) == 1: + # Single value. + return _get_type_inferred_value(sweep_distribution_separated[0]) + + # Distribution string + sweep_distribution_type = sweep_distribution_separated[0].strip().lower() + sweep_distribution_args: List = [] + for value in sweep_distribution_separated[1:]: + sweep_distribution_args.append(_get_type_inferred_value(value)) + + if sweep_distribution_type == SearchSpace.CHOICE: + sweep_distribution_args = [sweep_distribution_args] # Choice values are list of lists + + sweep_distribution = SweepDistribution._from_rest_object([sweep_distribution_type, sweep_distribution_args]) + return sweep_distribution + + +def _convert_sweep_dist_dict_to_str_dict(sweep_distribution: dict) -> dict: + for k, sweep_dist_dict in sweep_distribution.items(): + if sweep_dist_dict is not None: + sweep_distribution[k] = _convert_sweep_dist_dict_item_to_str(sweep_dist_dict) + return sweep_distribution + + +class ChoicePlusSchema(ChoiceSchema): + """Choice schema that allows boolean values also""" + + values = fields.List( + UnionField( + [ + DumpableIntegerField(strict=True), + DumpableStringField(), + fields.Float(), + fields.Dict( + keys=fields.Str(), + values=UnionField( + [ + NestedField("ChoicePlusSchema"), + NestedField(NormalSchema()), + NestedField(QNormalSchema()), + NestedField(RandintSchema()), + NestedField(UniformSchema()), + NestedField(QUniformSchema()), + DumpableIntegerField(strict=True), + fields.Float(), + fields.Str(), + fields.Boolean(), + ] + ), + ), + fields.Boolean(), + ] + ) + ) + + +def _convert_sweep_dist_dict_item_to_str(sweep_distribution: Union[bool, int, float, str, dict]) -> str: + # Convert a Sweep Distribution dict to Sweep Distribution string + # Eg. {type: 'choice', values: ['vitb16r224','vits16r224']} => "Choice('vitb16r224','vits16r224')" + if isinstance(sweep_distribution, dict): + sweep_dist_type = sweep_distribution["type"] + if sweep_dist_type == SearchSpace.CHOICE: + sweep_dist_obj = ChoicePlusSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.UNIFORM_LOGUNIFORM: + sweep_dist_obj = UniformSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.NORMAL_LOGNORMAL: + sweep_dist_obj = NormalSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.QUNIFORM_QLOGUNIFORM: + sweep_dist_obj = QUniformSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.QNORMAL_QLOGNORMAL: + sweep_dist_obj = QNormalSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.RANDINT: + sweep_dist_obj = RandintSchema().load(sweep_distribution) # pylint: disable=no-member + else: + msg = f"Unsupported sweep distribution type {sweep_dist_type}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + else: # Case for other primitive types + sweep_dist_obj = sweep_distribution + + sweep_dist_str = _convert_to_rest_object(sweep_dist_obj) + return sweep_dist_str + + +def _convert_sweep_dist_str_to_dict(sweep_dist_str_list: dict) -> dict: + for k, val in sweep_dist_str_list.items(): + if isinstance(val, str): + sweep_dist_str_list[k] = _convert_sweep_dist_str_item_to_dict(val) + return sweep_dist_str_list + + +def _convert_sweep_dist_str_item_to_dict( + sweep_distribution_str: str, +) -> Union[bool, int, float, str, dict]: + # sweep_distribution_str can be a distribution like "choice('vitb16r224', 'vits16r224')" + # return type is {type: 'choice', values: ['vitb16r224', 'vits16r224']} + sweep_dist_obj = _convert_from_rest_object(sweep_distribution_str) + sweep_dist: Union[bool, int, float, str, dict] = "" + if isinstance(sweep_dist_obj, SweepDistribution): + if isinstance(sweep_dist_obj, Choice): + sweep_dist = ChoicePlusSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, (QNormal, QLogNormal)): + sweep_dist = QNormalSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, (QUniform, QLogUniform)): + sweep_dist = QUniformSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, (Uniform, LogUniform)): + sweep_dist = UniformSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, (Normal, LogNormal)): + sweep_dist = NormalSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, Randint): + sweep_dist = RandintSchema().dump(sweep_dist_obj) # pylint: disable=no-member + else: + msg = "Invalid sweep distribution {}" + raise ValidationException( + message=msg.format(sweep_distribution_str), + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + else: # Case for other primitive types + sweep_dist = sweep_dist_obj + + return sweep_dist diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/stack_ensemble_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/stack_ensemble_settings.py new file mode 100644 index 00000000..c17fa7e3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/stack_ensemble_settings.py @@ -0,0 +1,70 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Any, Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import StackEnsembleSettings as RestStackEnsembleSettings +from azure.ai.ml._restclient.v2023_04_01_preview.models import StackMetaLearnerType +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class StackEnsembleSettings(RestTranslatableMixin): + """Advance setting to customize StackEnsemble run.""" + + def __init__( + self, + *, + stack_meta_learner_k_wargs: Optional[Any] = None, + stack_meta_learner_train_percentage: float = 0.2, + stack_meta_learner_type: Optional[StackMetaLearnerType] = None, + **kwargs: Any + ): + """ + :param stack_meta_learner_k_wargs: Optional parameters to pass to the initializer of the + meta-learner. + :type stack_meta_learner_k_wargs: any + :param stack_meta_learner_train_percentage: Specifies the proportion of the training set + (when choosing train and validation type of training) to be reserved for training the + meta-learner. Default value is 0.2. + :type stack_meta_learner_train_percentage: float + :param stack_meta_learner_type: The meta-learner is a model trained on the output of the + individual heterogeneous models. Possible values include: "None", "LogisticRegression", + "LogisticRegressionCV", "LightGBMClassifier", "ElasticNet", "ElasticNetCV", + "LightGBMRegressor", "LinearRegression". + :type stack_meta_learner_type: str or + ~azure.mgmt.machinelearningservices.models.StackMetaLearnerType + """ + super(StackEnsembleSettings, self).__init__(**kwargs) + self.stack_meta_learner_k_wargs = stack_meta_learner_k_wargs + self.stack_meta_learner_train_percentage = stack_meta_learner_train_percentage + self.stack_meta_learner_type = stack_meta_learner_type + + def _to_rest_object(self) -> RestStackEnsembleSettings: + return RestStackEnsembleSettings( + stack_meta_learner_k_wargs=self.stack_meta_learner_k_wargs, + stack_meta_learner_train_percentage=self.stack_meta_learner_train_percentage, + stack_meta_learner_type=self.stack_meta_learner_type, + ) + + @classmethod + def _from_rest_object(cls, obj: RestStackEnsembleSettings) -> "StackEnsembleSettings": + return cls( + stack_meta_learner_k_wargs=obj.stack_meta_learner_k_wargs, + stack_meta_learner_train_percentage=obj.stack_meta_learner_train_percentage, + stack_meta_learner_type=obj.stack_meta_learner_type, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, StackEnsembleSettings): + return NotImplemented + + return ( + super().__eq__(other) + and self.stack_meta_learner_k_wargs == other.stack_meta_learner_k_wargs + and self.stack_meta_learner_train_percentage == other.stack_meta_learner_train_percentage + and self.stack_meta_learner_type == other.stack_meta_learner_type + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/__init__.py new file mode 100644 index 00000000..c0373010 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/__init__.py @@ -0,0 +1,22 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from .automl_tabular import AutoMLTabular +from .classification_job import ClassificationJob +from .featurization_settings import ColumnTransformer, TabularFeaturizationSettings +from .forecasting_job import ForecastingJob +from .forecasting_settings import ForecastingSettings +from .limit_settings import TabularLimitSettings +from .regression_job import RegressionJob + +__all__ = [ + "AutoMLTabular", + "ClassificationJob", + "ColumnTransformer", + "ForecastingJob", + "ForecastingSettings", + "RegressionJob", + "TabularFeaturizationSettings", + "TabularLimitSettings", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/automl_tabular.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/automl_tabular.py new file mode 100644 index 00000000..5f4ed22b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/automl_tabular.py @@ -0,0 +1,607 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=too-many-instance-attributes + +from abc import ABC +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + AutoNCrossValidations, + BlockedTransformers, + CustomNCrossValidations, + LogVerbosity, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants import TabularTrainingMode +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.automl_vertical import AutoMLVertical +from azure.ai.ml.entities._job.automl.stack_ensemble_settings import StackEnsembleSettings +from azure.ai.ml.entities._job.automl.tabular.featurization_settings import ( + ColumnTransformer, + TabularFeaturizationSettings, +) +from azure.ai.ml.entities._job.automl.tabular.limit_settings import TabularLimitSettings +from azure.ai.ml.entities._job.automl.training_settings import TrainingSettings +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class AutoMLTabular(AutoMLVertical, ABC): + """Initialize an AutoML job entity for tabular data. + + Constructor for AutoMLTabular. + + :keyword task_type: The type of task to run. Possible values include: "classification", "regression" + , "forecasting". + :paramtype task_type: str + :keyword featurization: featurization settings. Defaults to None. + :paramtype featurization: typing.Optional[TabularFeaturizationSettings] + :keyword limits: limits settings. Defaults to None. + :paramtype limits: typing.Optional[TabularLimitSettings] + :keyword training: training settings. Defaults to None. + :paramtype training: typing.Optional[TrainingSettings] + :keyword log_verbosity: Verbosity of logging. Possible values include: "debug", "info", "warning", "error", + "critical". Defaults to "info". + :paramtype log_verbosity: str + :keyword target_column_name: The name of the target column. Defaults to None. + :paramtype target_column_name: typing.Optional[str] + :keyword weight_column_name: The name of the weight column. Defaults to None. + :paramtype weight_column_name: typing.Optional[str] + :keyword validation_data_size: The size of the validation data. Defaults to None. + :paramtype validation_data_size: typing.Optional[float] + :keyword cv_split_column_names: The names of the columns to use for cross validation. Defaults to None. + :paramtype cv_split_column_names: typing.Optional[List[str]] + :keyword n_cross_validations: The number of cross validations to run. Defaults to None. + :paramtype n_cross_validations: typing.Optional[int] + :keyword test_data_size: The size of the test data. Defaults to None. + :paramtype test_data_size: typing.Optional[float] + :keyword training_data: The training data. Defaults to None. + :paramtype training_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :keyword validation_data: The validation data. Defaults to None. + :paramtype validation_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :keyword test_data: The test data. Defaults to None. + :paramtype test_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + """ + + def __init__( + self, + *, + task_type: str, + featurization: Optional[TabularFeaturizationSettings] = None, + limits: Optional[TabularLimitSettings] = None, + training: Optional[Any] = None, + **kwargs: Any, + ) -> None: + """Initialize an AutoML job entity for tabular data. + + Constructor for AutoMLTabular. + + :keyword task_type: The type of task to run. Possible values include: "classification", "regression" + , "forecasting". + :paramtype task_type: str + :keyword featurization: featurization settings. Defaults to None. + :paramtype featurization: typing.Optional[TabularFeaturizationSettings] + :keyword limits: limits settings. Defaults to None. + :paramtype limits: typing.Optional[TabularLimitSettings] + :keyword training: training settings. Defaults to None. + :paramtype training: typing.Optional[TrainingSettings] + :keyword log_verbosity: Verbosity of logging. Possible values include: "debug", "info", "warning", "error", + "critical". Defaults to "info". + :paramtype log_verbosity: str + :keyword target_column_name: The name of the target column. Defaults to None. + :paramtype target_column_name: typing.Optional[str] + :keyword weight_column_name: The name of the weight column. Defaults to None. + :paramtype weight_column_name: typing.Optional[str] + :keyword validation_data_size: The size of the validation data. Defaults to None. + :paramtype validation_data_size: typing.Optional[float] + :keyword cv_split_column_names: The names of the columns to use for cross validation. Defaults to None. + :paramtype cv_split_column_names: typing.Optional[List[str]] + :keyword n_cross_validations: The number of cross validations to run. Defaults to None. + :paramtype n_cross_validations: typing.Optional[int] + :keyword test_data_size: The size of the test data. Defaults to None. + :paramtype test_data_size: typing.Optional[float] + :keyword training_data: The training data. Defaults to None. + :paramtype training_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :keyword validation_data: The validation data. Defaults to None. + :paramtype validation_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :keyword test_data: The test data. Defaults to None. + :paramtype test_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :raises: :class:`azure.ai.ml.exceptions.ValidationException` + """ + self.log_verbosity = kwargs.pop("log_verbosity", LogVerbosity.INFO) + + self.target_column_name = kwargs.pop("target_column_name", None) + self.weight_column_name = kwargs.pop("weight_column_name", None) + self.validation_data_size = kwargs.pop("validation_data_size", None) + self.cv_split_column_names = kwargs.pop("cv_split_column_names", None) + self.n_cross_validations = kwargs.pop("n_cross_validations", None) + self.test_data_size = kwargs.pop("test_data_size", None) + + super().__init__( + task_type=task_type, + training_data=kwargs.pop("training_data", None), + validation_data=kwargs.pop("validation_data", None), + test_data=kwargs.pop("test_data", None), + **kwargs, + ) + + self._featurization = featurization + self._limits = limits + self._training = training + + @property + def log_verbosity(self) -> LogVerbosity: + """Get the log verbosity for the AutoML job. + + :return: log verbosity for the AutoML job + :rtype: LogVerbosity + """ + return self._log_verbosity + + @log_verbosity.setter + def log_verbosity(self, value: Union[str, LogVerbosity]) -> None: + """Set the log verbosity for the AutoML job. + + :param value: str or LogVerbosity + :type value: typing.Union[str, LogVerbosity] + """ + self._log_verbosity = None if value is None else LogVerbosity[camel_to_snake(value).upper()] + + @property + def limits(self) -> Optional[TabularLimitSettings]: + """Get the tabular limits for the AutoML job. + + :return: Tabular limits for the AutoML job + :rtype: TabularLimitSettings + """ + return self._limits + + @limits.setter + def limits(self, value: Union[Dict, TabularLimitSettings]) -> None: + """Set the limits for the AutoML job. + + :param value: typing.Dict or TabularLimitSettings + :type value: typing.Union[typing.Dict, TabularLimitSettings] + :raises ValidationException: Expected a dictionary for limit settings. + """ + if isinstance(value, TabularLimitSettings): + self._limits = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for limit settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_limits(**value) + + @property + def training(self) -> Any: + """Get the training settings for the AutoML job. + + :return: Training settings for the AutoML job. + :rtype: TrainingSettings + """ + return self._training + + @training.setter + def training(self, value: Union[Dict, TrainingSettings]) -> None: + """Set the training settings for the AutoML job. + + :param value: typing.Dict or TrainingSettings + :type value: typing.Union[typing.Dict, TrainingSettings] + :raises ValidationException: Expected a dictionary for training settings. + """ + if isinstance(value, TrainingSettings): + self._training = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for training settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training(**value) + + @property + def featurization(self) -> Optional[TabularFeaturizationSettings]: + """Get the tabular featurization settings for the AutoML job. + + :return: Tabular featurization settings for the AutoML job + :rtype: TabularFeaturizationSettings + """ + return self._featurization + + @featurization.setter + def featurization(self, value: Union[Dict, TabularFeaturizationSettings]) -> None: + """Set the featurization settings for the AutoML job. + + :param value: typing.Dict or TabularFeaturizationSettings + :type value: typing.Union[typing.Dict, TabularFeaturizationSettings] + :raises ValidationException: Expected a dictionary for featurization settings + """ + if isinstance(value, TabularFeaturizationSettings): + self._featurization = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for featurization settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_featurization(**value) + + def set_limits( + self, + *, + enable_early_termination: Optional[bool] = None, + exit_score: Optional[float] = None, + max_concurrent_trials: Optional[int] = None, + max_cores_per_trial: Optional[int] = None, + max_nodes: Optional[int] = None, + max_trials: Optional[int] = None, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ) -> None: + """Set limits for the job. + + :keyword enable_early_termination: Whether to enable early termination if the score is not improving in the + short term, defaults to None. + + Early stopping logic: + + * No early stopping for first 20 iterations (landmarks). + * Early stopping window starts on the 21st iteration and looks for early_stopping_n_iters iterations + (currently set to 10). This means that the first iteration where stopping can occur is the 31st. + * AutoML still schedules 2 ensemble iterations AFTER early stopping, which might result in higher scores. + * Early stopping is triggered if the absolute value of best score calculated is the same for past + early_stopping_n_iters iterations, that is, if there is no improvement in score for + early_stopping_n_iters iterations. + + :paramtype enable_early_termination: typing.Optional[bool] + :keyword exit_score: Target score for experiment. The experiment terminates after this score is reached. + If not specified (no criteria), the experiment runs until no further progress is made + on the primary metric. For for more information on exit criteria, see this `article + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#exit-criteria>`_ + , defaults to None + :paramtype exit_score: typing.Optional[float] + :keyword max_concurrent_trials: This is the maximum number of iterations that would be executed in parallel. + The default value is 1. + + * AmlCompute clusters support one iteration running per node. For multiple AutoML experiment parent runs + executed in parallel on a single AmlCompute cluster, the sum of the ``max_concurrent_trials`` values + for all experiments should be less than or equal to the maximum number of nodes. Otherwise, runs + will be queued until nodes are available. + + * DSVM supports multiple iterations per node. ``max_concurrent_trials`` should + be less than or equal to the number of cores on the DSVM. For multiple experiments + run in parallel on a single DSVM, the sum of the ``max_concurrent_trials`` values for all + experiments should be less than or equal to the maximum number of nodes. + + * Databricks - ``max_concurrent_trials`` should be less than or equal to the number of + worker nodes on Databricks. + + ``max_concurrent_trials`` does not apply to local runs. Formerly, this parameter + was named ``concurrent_iterations``. + :paramtype max_concurrent_trials: typing.Optional[int] + :keyword max_cores_per_trial: The maximum number of threads to use for a given training iteration. + Acceptable values: + + * Greater than 1 and less than or equal to the maximum number of cores on the compute target. + + * Equal to -1, which means to use all the possible cores per iteration per child-run. + + * Equal to 1, the default. + + :paramtype max_cores_per_trial: typing.Optional[int] + :keyword max_nodes: [Experimental] The maximum number of nodes to use for distributed training. + + * For forecasting, each model is trained using max(2, int(max_nodes / max_concurrent_trials)) nodes. + + * For classification/regression, each model is trained using max_nodes nodes. + + Note- This parameter is in public preview and might change in future. + :paramtype max_nodes: typing.Optional[int] + :keyword max_trials: The total number of different algorithm and parameter combinations to test during an + automated ML experiment. If not specified, the default is 1000 iterations. + :paramtype max_trials: typing.Optional[int] + :keyword timeout_minutes: Maximum amount of time in minutes that all iterations combined can take before the + experiment terminates. If not specified, the default experiment timeout is 6 days. To specify a timeout + less than or equal to 1 hour, make sure your dataset's size is not greater than + 10,000,000 (rows times column) or an error results, defaults to None + :paramtype timeout_minutes: typing.Optional[int] + :keyword trial_timeout_minutes: Maximum time in minutes that each iteration can run for before it terminates. + If not specified, a value of 1 month or 43200 minutes is used, defaults to None + :paramtype trial_timeout_minutes: typing.Optional[int] + """ + self._limits = self._limits or TabularLimitSettings() + self._limits.enable_early_termination = ( + enable_early_termination if enable_early_termination is not None else self._limits.enable_early_termination + ) + self._limits.exit_score = exit_score if exit_score is not None else self._limits.exit_score + self._limits.max_concurrent_trials = ( + max_concurrent_trials if max_concurrent_trials is not None else self._limits.max_concurrent_trials + ) + self._limits.max_cores_per_trial = ( + max_cores_per_trial if max_cores_per_trial is not None else self._limits.max_cores_per_trial + ) + self._limits.max_nodes = max_nodes if max_nodes is not None else self._limits.max_nodes + self._limits.max_trials = max_trials if max_trials is not None else self._limits.max_trials + self._limits.timeout_minutes = timeout_minutes if timeout_minutes is not None else self._limits.timeout_minutes + self._limits.trial_timeout_minutes = ( + trial_timeout_minutes if trial_timeout_minutes is not None else self._limits.trial_timeout_minutes + ) + + def set_training( + self, + *, + enable_onnx_compatible_models: Optional[bool] = None, + enable_dnn_training: Optional[bool] = None, + enable_model_explainability: Optional[bool] = None, + enable_stack_ensemble: Optional[bool] = None, + enable_vote_ensemble: Optional[bool] = None, + stack_ensemble_settings: Optional[StackEnsembleSettings] = None, + ensemble_model_download_timeout: Optional[int] = None, + allowed_training_algorithms: Optional[List[str]] = None, + blocked_training_algorithms: Optional[List[str]] = None, + training_mode: Optional[Union[str, TabularTrainingMode]] = None, + ) -> None: + """The method to configure training related settings. + + :keyword enable_onnx_compatible_models: Whether to enable or disable enforcing the ONNX-compatible models. + The default is False. For more information about Open Neural Network Exchange (ONNX) and Azure Machine + Learning,see this `article <https://learn.microsoft.com/azure/machine-learning/concept-onnx>`__. + :paramtype enable_onnx_compatible_models: typing.Optional[bool] + :keyword enable_dnn_training: Whether to include DNN based models during model selection. + However, the default is True for DNN NLP tasks, and it's False for all other AutoML tasks. + :paramtype enable_dnn_training: typing.Optional[bool] + :keyword enable_model_explainability: Whether to enable explaining the best AutoML model at the end of all + AutoML training iterations. For more information, see + `Interpretability: model explanations in automated machine learning + <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__. + , defaults to None + :paramtype enable_model_explainability: typing.Optional[bool] + :keyword enable_stack_ensemble: Whether to enable/disable StackEnsemble iteration. + If `enable_onnx_compatible_models` flag is being set, then StackEnsemble iteration will be disabled. + Similarly, for Timeseries tasks, StackEnsemble iteration will be disabled by default, to avoid risks of + overfitting due to small training set used in fitting the meta learner. + For more information about ensembles, see `Ensemble configuration + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#ensemble>`__ + , defaults to None + :paramtype enable_stack_ensemble: typing.Optional[bool] + :keyword enable_vote_ensemble: Whether to enable/disable VotingEnsemble iteration. + For more information about ensembles, see `Ensemble configuration + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#ensemble>`__ + , defaults to None + :paramtype enable_vote_ensemble: typing.Optional[bool] + :keyword stack_ensemble_settings: Settings for StackEnsemble iteration, defaults to None + :paramtype stack_ensemble_settings: typing.Optional[StackEnsembleSettings] + :keyword ensemble_model_download_timeout: During VotingEnsemble and StackEnsemble model generation, + multiple fitted models from the previous child runs are downloaded. Configure this parameter with a + higher value than 300 secs, if more time is needed, defaults to None + :paramtype ensemble_model_download_timeout: typing.Optional[int] + :keyword allowed_training_algorithms: A list of model names to search for an experiment. If not specified, + then all models supported for the task are used minus any specified in ``blocked_training_algorithms`` + or deprecated TensorFlow models, defaults to None + :paramtype allowed_training_algorithms: typing.Optional[List[str]] + :keyword blocked_training_algorithms: A list of algorithms to ignore for an experiment, defaults to None + :paramtype blocked_training_algorithms: typing.Optional[List[str]] + :keyword training_mode: [Experimental] The training mode to use. + The possible values are- + + * distributed- enables distributed training for supported algorithms. + + * non_distributed- disables distributed training. + + * auto- Currently, it is same as non_distributed. In future, this might change. + + Note: This parameter is in public preview and may change in future. + :paramtype training_mode: typing.Optional[typing.Union[str, azure.ai.ml.constants.TabularTrainingMode]] + """ + # get training object by calling training getter of respective tabular task + self._training = self.training + if self._training is not None: + self._training.enable_onnx_compatible_models = ( + enable_onnx_compatible_models + if enable_onnx_compatible_models is not None + else self._training.enable_onnx_compatible_models + ) + self._training.enable_dnn_training = ( + enable_dnn_training if enable_dnn_training is not None else self._training.enable_dnn_training + ) + self._training.enable_model_explainability = ( + enable_model_explainability + if enable_model_explainability is not None + else self._training.enable_model_explainability + ) + self._training.enable_stack_ensemble = ( + enable_stack_ensemble if enable_stack_ensemble is not None else self._training.enable_stack_ensemble + ) + self._training.enable_vote_ensemble = ( + enable_vote_ensemble if enable_vote_ensemble is not None else self._training.enable_vote_ensemble + ) + self._training.stack_ensemble_settings = ( + stack_ensemble_settings + if stack_ensemble_settings is not None + else self._training.stack_ensemble_settings + ) + self._training.ensemble_model_download_timeout = ( + ensemble_model_download_timeout + if ensemble_model_download_timeout is not None + else self._training.ensemble_model_download_timeout + ) + + self._training.allowed_training_algorithms = allowed_training_algorithms + self._training.blocked_training_algorithms = blocked_training_algorithms + self._training.training_mode = training_mode if training_mode is not None else self._training.training_mode + + def set_featurization( + self, + *, + blocked_transformers: Optional[List[Union[BlockedTransformers, str]]] = None, + column_name_and_types: Optional[Dict[str, str]] = None, + dataset_language: Optional[str] = None, + transformer_params: Optional[Dict[str, List[ColumnTransformer]]] = None, + mode: Optional[str] = None, + enable_dnn_featurization: Optional[bool] = None, + ) -> None: + """Define feature engineering configuration. + + :keyword blocked_transformers: A list of transformer names to be blocked during featurization, defaults to None + :paramtype blocked_transformers: Optional[List[Union[BlockedTransformers, str]]] + :keyword column_name_and_types: A dictionary of column names and feature types used to update column purpose + , defaults to None + :paramtype column_name_and_types: Optional[Dict[str, str]] + :keyword dataset_language: Three character ISO 639-3 code for the language(s) contained in the dataset. + Languages other than English are only supported if you use GPU-enabled compute. The language_code + 'mul' should be used if the dataset contains multiple languages. To find ISO 639-3 codes for different + languages, please refer to https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes, defaults to None + :paramtype dataset_language: Optional[str] + :keyword transformer_params: A dictionary of transformer and corresponding customization parameters + , defaults to None + :paramtype transformer_params: Optional[Dict[str, List[ColumnTransformer]]] + :keyword mode: "off", "auto", defaults to "auto", defaults to None + :paramtype mode: Optional[str] + :keyword enable_dnn_featurization: Whether to include DNN based feature engineering methods, defaults to None + :paramtype enable_dnn_featurization: Optional[bool] + """ + self._featurization = self._featurization or TabularFeaturizationSettings() + self._featurization.blocked_transformers = ( + blocked_transformers if blocked_transformers is not None else self._featurization.blocked_transformers + ) + self._featurization.column_name_and_types = ( + column_name_and_types if column_name_and_types is not None else self._featurization.column_name_and_types + ) + self._featurization.dataset_language = ( + dataset_language if dataset_language is not None else self._featurization.dataset_language + ) + self._featurization.transformer_params = ( + transformer_params if transformer_params is not None else self._featurization.transformer_params + ) + self._featurization.mode = mode or self._featurization.mode + self._featurization.enable_dnn_featurization = ( + enable_dnn_featurization + if enable_dnn_featurization is not None + else self._featurization.enable_dnn_featurization + ) + + def set_data( + self, + *, + training_data: Input, + target_column_name: str, + weight_column_name: Optional[str] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + n_cross_validations: Optional[Union[str, int]] = None, + cv_split_column_names: Optional[List[str]] = None, + test_data: Optional[Input] = None, + test_data_size: Optional[float] = None, + ) -> None: + """Define data configuration. + + :keyword training_data: Training data. + :paramtype training_data: Input + :keyword target_column_name: Column name of the target column. + :paramtype target_column_name: str + :keyword weight_column_name: Weight column name, defaults to None + :paramtype weight_column_name: typing.Optional[str] + :keyword validation_data: Validation data, defaults to None + :paramtype validation_data: typing.Optional[Input] + :keyword validation_data_size: Validation data size, defaults to None + :paramtype validation_data_size: typing.Optional[float] + :keyword n_cross_validations: n_cross_validations, defaults to None + :paramtype n_cross_validations: typing.Optional[typing.Union[str, int]] + :keyword cv_split_column_names: cv_split_column_names, defaults to None + :paramtype cv_split_column_names: typing.Optional[typing.List[str]] + :keyword test_data: Test data, defaults to None + :paramtype test_data: typing.Optional[Input] + :keyword test_data_size: Test data size, defaults to None + :paramtype test_data_size: typing.Optional[float] + """ + self.target_column_name = target_column_name if target_column_name is not None else self.target_column_name + self.weight_column_name = weight_column_name if weight_column_name is not None else self.weight_column_name + self.training_data = training_data if training_data is not None else self.training_data + self.validation_data = validation_data if validation_data is not None else self.validation_data + self.validation_data_size = ( + validation_data_size if validation_data_size is not None else self.validation_data_size + ) + self.cv_split_column_names = ( + cv_split_column_names if cv_split_column_names is not None else self.cv_split_column_names + ) + self.n_cross_validations = n_cross_validations if n_cross_validations is not None else self.n_cross_validations + self.test_data = test_data if test_data is not None else self.test_data + self.test_data_size = test_data_size if test_data_size is not None else self.test_data_size + + def _validation_data_to_rest(self, rest_obj: "AutoMLTabular") -> None: + """Validation data serialization. + + :param rest_obj: Serialized object + :type rest_obj: AutoMLTabular + """ + if rest_obj.n_cross_validations: + n_cross_val = rest_obj.n_cross_validations + # Convert n_cross_validations int value to CustomNCrossValidations + if isinstance(n_cross_val, int) and n_cross_val > 1: + rest_obj.n_cross_validations = CustomNCrossValidations(value=n_cross_val) + # Convert n_cross_validations str value to AutoNCrossValidations + elif isinstance(n_cross_val, str): + rest_obj.n_cross_validations = AutoNCrossValidations() + + def _validation_data_from_rest(self) -> None: + """Validation data deserialization.""" + if self.n_cross_validations: + n_cross_val = self.n_cross_validations + # Convert n_cross_validations CustomNCrossValidations back into int value + if isinstance(n_cross_val, CustomNCrossValidations): + self.n_cross_validations = n_cross_val.value + # Convert n_cross_validations AutoNCrossValidations to str value + elif isinstance(n_cross_val, AutoNCrossValidations): + self.n_cross_validations = AutoMLConstants.AUTO + + def __eq__(self, other: object) -> bool: + """Return True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, AutoMLTabular): + return NotImplemented + + return ( + self.target_column_name == other.target_column_name + and self.weight_column_name == other.weight_column_name + and self.training_data == other.training_data + and self.validation_data == other.validation_data + and self.validation_data_size == other.validation_data_size + and self.cv_split_column_names == other.cv_split_column_names + and self.n_cross_validations == other.n_cross_validations + and self.test_data == other.test_data + and self.test_data_size == other.test_data_size + and self._featurization == other._featurization + and self._limits == other._limits + and self._training == other._training + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two AutoMLTabular objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/classification_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/classification_job.py new file mode 100644 index 00000000..6f5ab271 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/classification_job.py @@ -0,0 +1,352 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import Classification as RestClassification +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.tabular.automl_tabular import AutoMLTabular +from azure.ai.ml.entities._job.automl.tabular.featurization_settings import TabularFeaturizationSettings +from azure.ai.ml.entities._job.automl.tabular.limit_settings import TabularLimitSettings +from azure.ai.ml.entities._job.automl.training_settings import ( # noqa: F401 # pylint: disable=unused-import + ClassificationTrainingSettings, + TrainingSettings, +) +from azure.ai.ml.entities._util import load_from_dict + + +class ClassificationJob(AutoMLTabular): + """Configuration for AutoML Classification Job. + + :keyword primary_metric: The primary metric to use for optimization, defaults to None + :paramtype primary_metric: typing.Optional[str] + :keyword positive_label: Positive label for binary metrics calculation, defaults to None + :paramtype positive_label: typing.Optional[str] + :keyword featurization: Featurization settings. Defaults to None. + :paramtype featurization: typing.Optional[TabularFeaturizationSettings] + :keyword limits: Limits settings. Defaults to None. + :paramtype limits: typing.Optional[TabularLimitSettings] + :keyword training: Training settings. Defaults to None. + :paramtype training: typing.Optional[TrainingSettings] + :return: An instance of ClassificationJob object. + :rtype: ~azure.ai.ml.entities.automl.ClassificationJob + :raises ValueError: If primary_metric is not a valid primary metric + :raises ValueError: If positive_label is not a valid positive label + :raises ValueError: If featurization is not a valid featurization settings + :raises ValueError: If limits is not a valid limits settings + :raises ValueError: If training is not a valid training settings + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + primary_metric: Optional[str] = None, + positive_label: Optional[str] = None, + **kwargs: Any, + ) -> None: + """Initialize a new AutoML Classification task. + + :keyword primary_metric: The primary metric to use for optimization, defaults to None + :paramtype primary_metric: typing.Optional[str] + :keyword positive_label: Positive label for binary metrics calculation, defaults to None + :paramtype positive_label: typing.Optional[str] + :keyword featurization: featurization settings. Defaults to None. + :paramtype featurization: typing.Optional[TabularFeaturizationSettings] + :keyword limits: limits settings. Defaults to None. + :paramtype limits: typing.Optional[TabularLimitSettings] + :keyword training: training settings. Defaults to None. + :paramtype training: typing.Optional[TrainingSettings] + :raises ValueError: If primary_metric is not a valid primary metric + :raises ValueError: If positive_label is not a valid positive label + :raises ValueError: If featurization is not a valid featurization settings + :raises ValueError: If limits is not a valid limits settings + :raises ValueError: If training is not a valid training settings + """ + # Extract any task specific settings + featurization = kwargs.pop("featurization", None) + limits = kwargs.pop("limits", None) + training = kwargs.pop("training", None) + + super().__init__( + task_type=TaskType.CLASSIFICATION, + featurization=featurization, + limits=limits, + training=training, + **kwargs, + ) + + self.primary_metric = primary_metric or ClassificationJob._DEFAULT_PRIMARY_METRIC + self.positive_label = positive_label + + @property + def primary_metric(self) -> Union[str, ClassificationPrimaryMetrics]: + """The primary metric to use for optimization. + + :return: The primary metric to use for optimization. + :rtype: typing.Union[str, ClassificationPrimaryMetrics] + """ + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + """The primary metric to use for optimization setter. + + :param value: Primary metric to use for optimization. + :type value: typing.Union[str, ClassificationPrimaryMetrics] + """ + # TODO: better way to do this + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ClassificationJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + @property # type: ignore + def training(self) -> ClassificationTrainingSettings: + """Training Settings for AutoML Classification Job. + + :return: Training settings used for AutoML Classification Job. + :rtype: ClassificationTrainingSettings + """ + return self._training or ClassificationTrainingSettings() + + @training.setter + def training(self, value: Union[Dict, ClassificationTrainingSettings]) -> None: # pylint: disable=unused-argument + ... + + def _to_rest_object(self) -> JobBase: + """Convert ClassificationJob object to a REST object. + + :return: REST object representation of this object. + :rtype: JobBase + """ + classification_task = RestClassification( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + weight_column_name=self.weight_column_name, + cv_split_column_names=self.cv_split_column_names, + n_cross_validations=self.n_cross_validations, + test_data=self.test_data, + test_data_size=self.test_data_size, + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + limit_settings=self._limits._to_rest_object() if self._limits else None, + training_settings=self._training._to_rest_object() if self._training else None, + primary_metric=self.primary_metric, + positive_label=self.positive_label, + log_verbosity=self.log_verbosity, + ) + self._resolve_data_inputs(classification_task) + self._validation_data_to_rest(classification_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=classification_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ClassificationJob": + """Convert a REST object to ClassificationJob object. + + :param obj: ClassificationJob in Rest format. + :type obj: JobBase + :return: ClassificationJob objects. + :rtype: ClassificationJob + """ + + properties: RestAutoMLJob = obj.properties + task_details: RestClassification = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + classification_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + weight_column_name=task_details.weight_column_name, + cv_split_column_names=task_details.cv_split_column_names, + n_cross_validations=task_details.n_cross_validations, + test_data=task_details.test_data, + test_data_size=task_details.test_data_size, + featurization=( + TabularFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ), + limits=( + TabularLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + training=( + ClassificationTrainingSettings._from_rest_object(task_details.training_settings) + if task_details.training_settings + else None + ), + primary_metric=task_details.primary_metric, + positive_label=task_details.positive_label, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + classification_job._restore_data_inputs() + classification_job._validation_data_from_rest() + + return classification_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ClassificationJob": + """Load from a dictionary. + + :param data: dictionary representation of the object. + :type data: typing.Dict + :param context: dictionary containing the context. + :type context: typing.Dict + :param additional_message: additional message to be added to the error message. + :type additional_message: str + :return: ClassificationJob object. + :rtype: ClassificationJob + """ + from azure.ai.ml._schema.automl.table_vertical.classification import AutoMLClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLClassificationNodeSchema + + if kwargs.pop("inside_pipeline", False): + loaded_data = load_from_dict( + AutoMLClassificationNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(AutoMLClassificationSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ClassificationJob": + """Create an instance from a schema dictionary. + + :param loaded_data: dictionary containing the data. + :type loaded_data: typing.Dict + :return: ClassificationJob object. + :rtype: ClassificationJob + """ + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "weight_column_name": loaded_data.pop("weight_column_name", None), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + "cv_split_column_names": loaded_data.pop("cv_split_column_names", None), + "n_cross_validations": loaded_data.pop("n_cross_validations", None), + "test_data": loaded_data.pop("test_data", None), + "test_data_size": loaded_data.pop("test_data_size", None), + } + job = ClassificationJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + """Convert the object to a dictionary. + + :param inside_pipeline: whether the job is inside a pipeline or not, defaults to False + :type inside_pipeline: bool + :return: dictionary representation of the object. + :rtype: typing.Dict + """ + from azure.ai.ml._schema.automl.table_vertical.classification import AutoMLClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLClassificationNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = AutoMLClassificationNodeSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + else: + schema_dict = AutoMLClassificationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, ClassificationJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + """Check inequality between two ImageLimitSettings objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/featurization_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/featurization_settings.py new file mode 100644 index 00000000..6ef2332e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/featurization_settings.py @@ -0,0 +1,170 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import logging +from typing import Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import BlockedTransformers +from azure.ai.ml._restclient.v2023_04_01_preview.models import ColumnTransformer as RestColumnTransformer +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + TableVerticalFeaturizationSettings as RestTabularFeaturizationSettings, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLTransformerParameterKeys +from azure.ai.ml.entities._job.automl.featurization_settings import FeaturizationSettings, FeaturizationSettingsType +from azure.ai.ml.entities._mixins import RestTranslatableMixin + +module_logger = logging.getLogger(__name__) + + +class ColumnTransformer(RestTranslatableMixin): + """Column transformer settings. + + :param fields: The fields on which to perform custom featurization + :type field: List[str] + :param parameters: parameters used for custom featurization + :type parameters: Dict[str, Optional[str, float]] + """ + + def __init__( + self, + *, + fields: Optional[List[str]] = None, + parameters: Optional[Dict[str, Union[str, float]]] = None, + ): + self.fields = fields + self.parameters = parameters + + def _to_rest_object(self) -> RestColumnTransformer: + return RestColumnTransformer(fields=self.fields, parameters=self.parameters) + + @classmethod + def _from_rest_object(cls, obj: RestColumnTransformer) -> Optional["ColumnTransformer"]: + if obj: + fields = obj.fields + parameters = obj.parameters + return ColumnTransformer(fields=fields, parameters=parameters) + return None + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ColumnTransformer): + return NotImplemented + return self.fields == other.fields and self.parameters == other.parameters + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +class TabularFeaturizationSettings(FeaturizationSettings): + """Featurization settings for an AutoML Job.""" + + def __init__( + self, + *, + blocked_transformers: Optional[List[Union[BlockedTransformers, str]]] = None, + column_name_and_types: Optional[Dict[str, str]] = None, + dataset_language: Optional[str] = None, + transformer_params: Optional[Dict[str, List[ColumnTransformer]]] = None, + mode: Optional[str] = None, + enable_dnn_featurization: Optional[bool] = None, + ): + """ + :param blocked_transformers: A list of transformers to ignore when featurizing. + :type blocked_transformers: List[Union[BlockedTransformers, str]] + :param column_name_and_types: A dictionary of column names and feature types used to update column purpose. + :type column_name_and_types: Dict[str, str] + :param dataset_language: The language of the dataset. + :type dataset_language: str + :param transformer_params: A dictionary of transformers and their parameters. + :type transformer_params: Dict[str, List[ColumnTransformer]] + :param mode: The mode of the featurization. + :type mode: str + :param enable_dnn_featurization: Whether to enable DNN featurization. + :type enable_dnn_featurization: bool + :ivar type: Specifies the type of FeaturizationSettings. Set automatically to "Tabular" for this class. + :vartype type: str + """ + super().__init__(dataset_language=dataset_language) + self.blocked_transformers = blocked_transformers + self.column_name_and_types = column_name_and_types + self.transformer_params = transformer_params + self.mode = mode + self.enable_dnn_featurization = enable_dnn_featurization + self.type = FeaturizationSettingsType.TABULAR + + @property + def transformer_params(self) -> Optional[Dict[str, List[ColumnTransformer]]]: + """A dictionary of transformers and their parameters.""" + return self._transformer_params + + @transformer_params.setter + def transformer_params(self, value: Dict[str, List[ColumnTransformer]]) -> None: + self._transformer_params = ( + None + if not value + else {(AutoMLTransformerParameterKeys[camel_to_snake(k).upper()].value): v for k, v in value.items()} + ) + + @property + def blocked_transformers(self) -> Optional[List[Union[BlockedTransformers, str]]]: + """A list of transformers to ignore when featurizing.""" + return self._blocked_transformers + + @blocked_transformers.setter + def blocked_transformers(self, blocked_transformers_list: List[Union[BlockedTransformers, str]]) -> None: + self._blocked_transformers = ( + None + if blocked_transformers_list is None + else [BlockedTransformers[camel_to_snake(o)] for o in blocked_transformers_list] + ) + + def _to_rest_object(self) -> RestTabularFeaturizationSettings: + transformer_dict = {} + if self.transformer_params: + for key, settings in self.transformer_params.items(): + transformer_dict[key] = [o._to_rest_object() for o in settings] + return RestTabularFeaturizationSettings( + blocked_transformers=self.blocked_transformers, + column_name_and_types=self.column_name_and_types, + dataset_language=self.dataset_language, + mode=self.mode, + transformer_params=transformer_dict, + enable_dnn_featurization=self.enable_dnn_featurization, + ) + + @classmethod + def _from_rest_object(cls, obj: RestTabularFeaturizationSettings) -> "TabularFeaturizationSettings": + rest_transformers_params = obj.transformer_params + transformer_dict: Optional[Dict] = None + if rest_transformers_params: + transformer_dict = {} + for key, settings in rest_transformers_params.items(): + transformer_dict[key] = [ColumnTransformer._from_rest_object(o) for o in settings] + transformer_params = transformer_dict + + return TabularFeaturizationSettings( + blocked_transformers=obj.blocked_transformers, + column_name_and_types=obj.column_name_and_types, + dataset_language=obj.dataset_language, + transformer_params=transformer_params, + mode=obj.mode, + enable_dnn_featurization=obj.enable_dnn_featurization, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TabularFeaturizationSettings): + return NotImplemented + return ( + super().__eq__(other) + and self.blocked_transformers == other.blocked_transformers + and self.column_name_and_types == other.column_name_and_types + and self.transformer_params == other.transformer_params + and self.mode == other.mode + and self.enable_dnn_featurization == other.enable_dnn_featurization + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_job.py new file mode 100644 index 00000000..9bd10b19 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_job.py @@ -0,0 +1,686 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import Forecasting as RestForecasting +from azure.ai.ml._restclient.v2023_04_01_preview.models import ForecastingPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants import TabularTrainingMode +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.stack_ensemble_settings import StackEnsembleSettings +from azure.ai.ml.entities._job.automl.tabular.automl_tabular import AutoMLTabular +from azure.ai.ml.entities._job.automl.tabular.featurization_settings import TabularFeaturizationSettings +from azure.ai.ml.entities._job.automl.tabular.forecasting_settings import ForecastingSettings +from azure.ai.ml.entities._job.automl.tabular.limit_settings import TabularLimitSettings +from azure.ai.ml.entities._job.automl.training_settings import ForecastingTrainingSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ForecastingJob(AutoMLTabular): + """ + Configuration for AutoML Forecasting Task. + + :param primary_metric: The primary metric to use for model selection. + :type primary_metric: Optional[str] + :param forecasting_settings: The settings for the forecasting task. + :type forecasting_settings: + Optional[~azure.ai.ml.automl.ForecastingSettings] + :param kwargs: Job-specific arguments + :type kwargs: Dict[str, Any] + """ + + _DEFAULT_PRIMARY_METRIC = ForecastingPrimaryMetrics.NORMALIZED_ROOT_MEAN_SQUARED_ERROR + + def __init__( + self, + *, + primary_metric: Optional[str] = None, + forecasting_settings: Optional[ForecastingSettings] = None, + **kwargs: Any, + ) -> None: + """Initialize a new AutoML Forecasting task.""" + # Extract any task specific settings + featurization = kwargs.pop("featurization", None) + limits = kwargs.pop("limits", None) + training = kwargs.pop("training", None) + + super().__init__( + task_type=TaskType.FORECASTING, + featurization=featurization, + limits=limits, + training=training, + **kwargs, + ) + + self.primary_metric = primary_metric or ForecastingJob._DEFAULT_PRIMARY_METRIC + self._forecasting_settings = forecasting_settings + + @property + def primary_metric(self) -> Optional[str]: + """ + Return the primary metric to use for model selection. + + :return: The primary metric for model selection. + :rtype: Optional[str] + """ + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ForecastingPrimaryMetrics]) -> None: + """ + Set the primary metric to use for model selection. + + :param value: The primary metric for model selection. + :type: Union[str, ~azure.ai.ml.automl.ForecastingPrimaryMetrics] + """ + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ForecastingJob._DEFAULT_PRIMARY_METRIC + if value is None + else ForecastingPrimaryMetrics[camel_to_snake(value).upper()] + ) + + @property # type: ignore + def training(self) -> ForecastingTrainingSettings: + """ + Return the forecast training settings. + + :return: training settings. + :rtype: ~azure.ai.ml.automl.ForecastingTrainingSettings + """ + return self._training or ForecastingTrainingSettings() + + @training.setter + def training(self, value: Union[Dict, ForecastingTrainingSettings]) -> None: # pylint: disable=unused-argument + ... + + @property + def forecasting_settings(self) -> Optional[ForecastingSettings]: + """ + Return the forecast settings. + + :return: forecast settings. + :rtype: ~azure.ai.ml.automl.ForecastingSettings + """ + return self._forecasting_settings + + def set_forecast_settings( + self, + *, + time_column_name: Optional[str] = None, + forecast_horizon: Optional[Union[str, int]] = None, + time_series_id_column_names: Optional[Union[str, List[str]]] = None, + target_lags: Optional[Union[str, int, List[int]]] = None, + feature_lags: Optional[str] = None, + target_rolling_window_size: Optional[Union[str, int]] = None, + country_or_region_for_holidays: Optional[str] = None, + use_stl: Optional[str] = None, + seasonality: Optional[Union[str, int]] = None, + short_series_handling_config: Optional[str] = None, + frequency: Optional[str] = None, + target_aggregate_function: Optional[str] = None, + cv_step_size: Optional[int] = None, + features_unknown_at_forecast_time: Optional[Union[str, List[str]]] = None, + ) -> None: + """Manage parameters used by forecasting tasks. + + :keyword time_column_name: + The name of the time column. This parameter is required when forecasting to specify the datetime + column in the input data used for building the time series and inferring its frequency. + :paramtype time_column_name: Optional[str] + :keyword forecast_horizon: + The desired maximum forecast horizon in units of time-series frequency. The default value is 1. + + Units are based on the time interval of your training data, e.g., monthly, weekly that the forecaster + should predict out. When task type is forecasting, this parameter is required. For more information on + setting forecasting parameters, see `Auto-train a time-series forecast model <https://learn.microsoft.com/ + azure/machine-learning/how-to-auto-train-forecast>`_. + :type forecast_horizon: Optional[Union[int, str]] + :keyword time_series_id_column_names: + The names of columns used to group a time series. + It can be used to create multiple series. If time series id column names is not defined or + the identifier columns specified do not identify all the series in the dataset, the time series identifiers + will be automatically created for your data set. + :paramtype time_series_id_column_names: Optional[Union[str, List[str]]] + :keyword target_lags: The number of past periods to lag from the target column. By default the lags are turned + off. + + When forecasting, this parameter represents the number of rows to lag the target values based + on the frequency of the data. This is represented as a list or single integer. Lag should be used + when the relationship between the independent variables and dependent variable do not match up or + correlate by default. For example, when trying to forecast demand for a product, the demand in any + month may depend on the price of specific commodities 3 months prior. In this example, you may want + to lag the target (demand) negatively by 3 months so that the model is training on the correct + relationship. For more information, see `Auto-train a time-series forecast model + <https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-forecast>`_. + + **Note on auto detection of target lags and rolling window size. + Please see the corresponding comments in the rolling window section.** + We use the next algorithm to detect the optimal target lag and rolling window size. + + #. Estimate the maximum lag order for the look back feature selection. In our case it is the number of + periods till the next date frequency granularity i.e. if frequency is daily, it will be a week (7), + if it is a week, it will be month (4). That values multiplied by two is the largest + possible values of lags/rolling windows. In our examples, we will consider the maximum lag + order of 14 and 8 respectively). + #. Create a de-seasonalized series by adding trend and residual components. This will be used + in the next step. + #. Estimate the PACF - Partial Auto Correlation Function on the on the data from (2) + and search for points, where the auto correlation is significant i.e. its absolute + value is more then 1.96/square_root(maximal lag value), which correspond to significance of 95%. + #. If all points are significant, we consider it being strong seasonality + and do not create look back features. + #. We scan the PACF values from the beginning and the value before the first insignificant + auto correlation will designate the lag. If first significant element (value correlate with + itself) is followed by insignificant, the lag will be 0 and we will not use look back features. + + :type target_lags: Optional[Union[str, int, List[int]]] + :keyword feature_lags: Flag for generating lags for the numeric features with 'auto' or None. + :paramtype feature_lags: Optional[str] + :keyword target_rolling_window_size: The number of past periods used to create a rolling window average of the + target column. + + When forecasting, this parameter represents `n` historical periods to use to generate forecasted values, + <= training set size. If omitted, `n` is the full training set size. Specify this parameter + when you only want to consider a certain amount of history when training the model. + If set to 'auto', rolling window will be estimated as the last + value where the PACF is more then the significance threshold. Please see target_lags section for details. + :paramtype target_rolling_window_size: Optional[Union[str, int]] + :keyword country_or_region_for_holidays: The country/region used to generate holiday features. + These should be ISO 3166 two-letter country/region codes, for example 'US' or 'GB'. + :paramtype country_or_region_for_holidays: Optional[str] + :keyword use_stl: Configure STL Decomposition of the time-series target column. + use_stl can take three values: None (default) - no stl decomposition, 'season' - only generate + season component and season_trend - generate both season and trend components. + :type use_stl: Optional[str] + :keyword seasonality: Set time series seasonality as an integer multiple of the series frequency. + If seasonality is set to 'auto', it will be inferred. + If set to None, the time series is assumed non-seasonal which is equivalent to seasonality=1. + :paramtype seasonality: Optional[Union[int, str] + :keyword short_series_handling_config: + The parameter defining how if AutoML should handle short time series. + + Possible values: 'auto' (default), 'pad', 'drop' and None. + + * **auto** short series will be padded if there are no long series, + otherwise short series will be dropped. + * **pad** all the short series will be padded. + * **drop** all the short series will be dropped". + * **None** the short series will not be modified. + + If set to 'pad', the table will be padded with the zeroes and + empty values for the regressors and random values for target with the mean + equal to target value median for given time series id. If median is more or equal + to zero, the minimal padded value will be clipped by zero: + Input: + + +------------+---------------+----------+--------+ + | Date | numeric_value | string | target | + +============+===============+==========+========+ + | 2020-01-01 | 23 | green | 55 | + +------------+---------------+----------+--------+ + + Output assuming minimal number of values is four: + + +------------+---------------+----------+--------+ + | Date | numeric_value | string | target | + +============+===============+==========+========+ + | 2019-12-29 | 0 | NA | 55.1 | + +------------+---------------+----------+--------+ + | 2019-12-30 | 0 | NA | 55.6 | + +------------+---------------+----------+--------+ + | 2019-12-31 | 0 | NA | 54.5 | + +------------+---------------+----------+--------+ + | 2020-01-01 | 23 | green | 55 | + +------------+---------------+----------+--------+ + + **Note:** We have two parameters short_series_handling_configuration and + legacy short_series_handling. When both parameters are set we are + synchronize them as shown in the table below (short_series_handling_configuration and + short_series_handling for brevity are marked as handling_configuration and handling + respectively). + + +------------+--------------------------+----------------------+-----------------------------+ + | | handling | | handling | | resulting | | resulting | + | | | configuration | | handling | | handling | + | | | | | configuration | + +============+==========================+======================+=============================+ + | True | auto | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | pad | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | drop | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | None | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | auto | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | pad | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | drop | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | None | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + + :type short_series_handling_config: Optional[str] + :keyword frequency: Forecast frequency. + + When forecasting, this parameter represents the period with which the forecast is desired, + for example daily, weekly, yearly, etc. The forecast frequency is dataset frequency by default. + You can optionally set it to greater (but not lesser) than dataset frequency. + We'll aggregate the data and generate the results at forecast frequency. For example, + for daily data, you can set the frequency to be daily, weekly or monthly, but not hourly. + The frequency needs to be a pandas offset alias. + Please refer to pandas documentation for more information: + https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects + :type frequency: Optional[str] + :keyword target_aggregate_function: The function to be used to aggregate the time series target + column to conform to a user specified frequency. If the target_aggregation_function is set, + but the freq parameter is not set, the error is raised. The possible target aggregation + functions are: "sum", "max", "min" and "mean". + + * The target column values are aggregated based on the specified operation. + Typically, sum is appropriate for most scenarios. + * Numerical predictor columns in your data are aggregated by sum, mean, minimum value, + and maximum value. As a result, automated ML generates new columns suffixed with the + aggregation function name and applies the selected aggregate operation. + * For categorical predictor columns, the data is aggregated by mode, + the most prominent category in the window. + * Date predictor columns are aggregated by minimum value, maximum value and mode. + + +----------------+-------------------------------+--------------------------------------+ + | | freq | | target_aggregation_function | | Data regularity | + | | | | fixing mechanism | + +================+===============================+======================================+ + | None (Default) | None (Default) | | The aggregation | + | | | | is not applied. | + | | | | If the valid | + | | | | frequency can | + | | | | not be | + | | | | determined | + | | | | the error | + | | | | will be raised. | + +----------------+-------------------------------+--------------------------------------+ + | Some Value | None (Default) | | The aggregation | + | | | | is not applied. | + | | | | If the number | + | | | | of data points | + | | | | compliant to | + | | | | given frequency | + | | | | grid is | + | | | | less then 90% | + | | | | these points | + | | | | will be | + | | | | removed, | + | | | | otherwise | + | | | | the error will | + | | | | be raised. | + +----------------+-------------------------------+--------------------------------------+ + | None (Default) | Aggregation function | | The error about | + | | | | missing | + | | | | frequency | + | | | | parameter is | + | | | | raised. | + +----------------+-------------------------------+--------------------------------------+ + | Some Value | Aggregation function | | Aggregate to | + | | | | frequency using | + | | | | provided | + | | | | aggregation | + | | | | function. | + +----------------+-------------------------------+--------------------------------------+ + + :type target_aggregate_function: Optional[str] + :keyword cv_step_size: Number of periods between the origin_time of one CV fold and the next fold. + For example, if `n_step` = 3 for daily data, the origin time for each fold will be three days apart. + :paramtype cv_step_size: Optional[int] + :keyword features_unknown_at_forecast_time: The feature columns that are available for training but + unknown at the time of forecast/inference. If features_unknown_at_forecast_time is set to an empty + list, it is assumed that all the feature columns in the dataset are known at inference time. If this + parameter is not set the support for future features is not enabled. + :paramtype features_unknown_at_forecast_time: Optional[Union[str, List[str]]] + """ + self._forecasting_settings = self._forecasting_settings or ForecastingSettings() + + self._forecasting_settings.country_or_region_for_holidays = ( + country_or_region_for_holidays + if country_or_region_for_holidays is not None + else self._forecasting_settings.country_or_region_for_holidays + ) + self._forecasting_settings.cv_step_size = ( + cv_step_size if cv_step_size is not None else self._forecasting_settings.cv_step_size + ) + self._forecasting_settings.forecast_horizon = ( + forecast_horizon if forecast_horizon is not None else self._forecasting_settings.forecast_horizon + ) + self._forecasting_settings.target_lags = ( + target_lags if target_lags is not None else self._forecasting_settings.target_lags + ) + self._forecasting_settings.target_rolling_window_size = ( + target_rolling_window_size + if target_rolling_window_size is not None + else self._forecasting_settings.target_rolling_window_size + ) + self._forecasting_settings.frequency = ( + frequency if frequency is not None else self._forecasting_settings.frequency + ) + self._forecasting_settings.feature_lags = ( + feature_lags if feature_lags is not None else self._forecasting_settings.feature_lags + ) + self._forecasting_settings.seasonality = ( + seasonality if seasonality is not None else self._forecasting_settings.seasonality + ) + self._forecasting_settings.use_stl = use_stl if use_stl is not None else self._forecasting_settings.use_stl + self._forecasting_settings.short_series_handling_config = ( + short_series_handling_config + if short_series_handling_config is not None + else self._forecasting_settings.short_series_handling_config + ) + self._forecasting_settings.target_aggregate_function = ( + target_aggregate_function + if target_aggregate_function is not None + else self._forecasting_settings.target_aggregate_function + ) + self._forecasting_settings.time_column_name = ( + time_column_name if time_column_name is not None else self._forecasting_settings.time_column_name + ) + self._forecasting_settings.time_series_id_column_names = ( + time_series_id_column_names + if time_series_id_column_names is not None + else self._forecasting_settings.time_series_id_column_names + ) + self._forecasting_settings.features_unknown_at_forecast_time = ( + features_unknown_at_forecast_time + if features_unknown_at_forecast_time is not None + else self._forecasting_settings.features_unknown_at_forecast_time + ) + + # override + def set_training( + self, + *, + enable_onnx_compatible_models: Optional[bool] = None, + enable_dnn_training: Optional[bool] = None, + enable_model_explainability: Optional[bool] = None, + enable_stack_ensemble: Optional[bool] = None, + enable_vote_ensemble: Optional[bool] = None, + stack_ensemble_settings: Optional[StackEnsembleSettings] = None, + ensemble_model_download_timeout: Optional[int] = None, + allowed_training_algorithms: Optional[List[str]] = None, + blocked_training_algorithms: Optional[List[str]] = None, + training_mode: Optional[Union[str, TabularTrainingMode]] = None, + ) -> None: + """ + The method to configure forecast training related settings. + + :keyword enable_onnx_compatible_models: + Whether to enable or disable enforcing the ONNX-compatible models. + The default is False. For more information about Open Neural Network Exchange (ONNX) and Azure Machine + Learning, see this `article <https://learn.microsoft.com/azure/machine-learning/concept-onnx>`__. + :type enable_onnx_compatible: Optional[bool] + :keyword enable_dnn_training: + Whether to include DNN based models during model selection. + However, the default is True for DNN NLP tasks, and it's False for all other AutoML tasks. + :paramtype enable_dnn_training: Optional[bool] + :keyword enable_model_explainability: + Whether to enable explaining the best AutoML model at the end of all AutoML training iterations. + For more information, see `Interpretability: model explanations in automated machine learning + <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__. + , defaults to None + :type enable_model_explainability: Optional[bool] + :keyword enable_stack_ensemble: + Whether to enable/disable StackEnsemble iteration. + If `enable_onnx_compatible_models` flag is being set, then StackEnsemble iteration will be disabled. + Similarly, for Timeseries tasks, StackEnsemble iteration will be disabled by default, to avoid risks of + overfitting due to small training set used in fitting the meta learner. + For more information about ensembles, see `Ensemble configuration + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#ensemble>`__ + , defaults to None + :type enable_stack_ensemble: Optional[bool] + :keyword enable_vote_ensemble: + Whether to enable/disable VotingEnsemble iteration. + For more information about ensembles, see `Ensemble configuration + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#ensemble>`__ + , defaults to None + :type enable_vote_ensemble: Optional[bool] + :keyword stack_ensemble_settings: + Settings for StackEnsemble iteration, defaults to None + :paramtype stack_ensemble_settings: Optional[StackEnsembleSettings] + :keyword ensemble_model_download_timeout: + During VotingEnsemble and StackEnsemble model generation, + multiple fitted models from the previous child runs are downloaded. Configure this parameter with a + higher value than 300 secs, if more time is needed, defaults to None + :paramtype ensemble_model_download_timeout: Optional[int] + :keyword allowed_training_algorithms: + A list of model names to search for an experiment. If not specified, + then all models supported for the task are used minus any specified in ``blocked_training_algorithms`` + or deprecated TensorFlow models, defaults to None + :paramtype allowed_training_algorithms: Optional[List[str]] + :keyword blocked_training_algorithms: + A list of algorithms to ignore for an experiment, defaults to None + :paramtype blocked_training_algorithms: Optional[List[str]] + :keyword training_mode: + [Experimental] The training mode to use. + The possible values are- + + * distributed- enables distributed training for supported algorithms. + + * non_distributed- disables distributed training. + + * auto- Currently, it is same as non_distributed. In future, this might change. + + Note: This parameter is in public preview and may change in future. + :type training_mode: Optional[Union[~azure.ai.ml.constants.TabularTrainingMode, str]] + """ + super().set_training( + enable_onnx_compatible_models=enable_onnx_compatible_models, + enable_dnn_training=enable_dnn_training, + enable_model_explainability=enable_model_explainability, + enable_stack_ensemble=enable_stack_ensemble, + enable_vote_ensemble=enable_vote_ensemble, + stack_ensemble_settings=stack_ensemble_settings, + ensemble_model_download_timeout=ensemble_model_download_timeout, + allowed_training_algorithms=allowed_training_algorithms, + blocked_training_algorithms=blocked_training_algorithms, + training_mode=training_mode, + ) + + # Disable stack ensemble by default, since it is currently not supported for forecasting tasks + if enable_stack_ensemble is None: + if self._training is not None: + self._training.enable_stack_ensemble = False + + def _to_rest_object(self) -> JobBase: + if self._forecasting_settings is not None: + forecasting_task = RestForecasting( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + weight_column_name=self.weight_column_name, + cv_split_column_names=self.cv_split_column_names, + n_cross_validations=self.n_cross_validations, + test_data=self.test_data, + test_data_size=self.test_data_size, + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + limit_settings=self._limits._to_rest_object() if self._limits else None, + training_settings=self._training._to_rest_object() if self._training else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + forecasting_settings=self._forecasting_settings._to_rest_object(), + ) + else: + forecasting_task = RestForecasting( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + weight_column_name=self.weight_column_name, + cv_split_column_names=self.cv_split_column_names, + n_cross_validations=self.n_cross_validations, + test_data=self.test_data, + test_data_size=self.test_data_size, + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + limit_settings=self._limits._to_rest_object() if self._limits else None, + training_settings=self._training._to_rest_object() if self._training else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + forecasting_settings=None, + ) + + self._resolve_data_inputs(forecasting_task) + self._validation_data_to_rest(forecasting_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=forecasting_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ForecastingJob": + properties: RestAutoMLJob = obj.properties + task_details: RestForecasting = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + forecasting_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + weight_column_name=task_details.weight_column_name, + cv_split_column_names=task_details.cv_split_column_names, + n_cross_validations=task_details.n_cross_validations, + test_data=task_details.test_data, + test_data_size=task_details.test_data_size, + featurization=( + TabularFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ), + limits=( + TabularLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + training=( + ForecastingTrainingSettings._from_rest_object(task_details.training_settings) + if task_details.training_settings + else None + ), + primary_metric=task_details.primary_metric, + forecasting_settings=( + ForecastingSettings._from_rest_object(task_details.forecasting_settings) + if task_details.forecasting_settings + else None + ), + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + forecasting_job._restore_data_inputs() + forecasting_job._validation_data_from_rest() + + return forecasting_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ForecastingJob": + from azure.ai.ml._schema.automl.table_vertical.forecasting import AutoMLForecastingSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLForecastingNodeSchema + + if kwargs.pop("inside_pipeline", False): + loaded_data = load_from_dict(AutoMLForecastingNodeSchema, data, context, additional_message, **kwargs) + else: + loaded_data = load_from_dict(AutoMLForecastingSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ForecastingJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "weight_column_name": loaded_data.pop("weight_column_name", None), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + "cv_split_column_names": loaded_data.pop("cv_split_column_names", None), + "n_cross_validations": loaded_data.pop("n_cross_validations", None), + "test_data": loaded_data.pop("test_data", None), + "test_data_size": loaded_data.pop("test_data_size", None), + } + job = ForecastingJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.table_vertical.forecasting import AutoMLForecastingSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLForecastingNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = AutoMLForecastingNodeSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + else: + schema_dict = AutoMLForecastingSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ForecastingJob): + return NotImplemented + + if not super(ForecastingJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric and self._forecasting_settings == other._forecasting_settings + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_settings.py new file mode 100644 index 00000000..09439483 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_settings.py @@ -0,0 +1,383 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=too-many-instance-attributes + +from typing import List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + AutoForecastHorizon, + AutoSeasonality, + AutoTargetLags, + AutoTargetRollingWindowSize, + CustomForecastHorizon, + CustomSeasonality, + CustomTargetLags, + CustomTargetRollingWindowSize, + ForecastHorizonMode, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ForecastingSettings as RestForecastingSettings, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + SeasonalityMode, + TargetLagsMode, + TargetRollingWindowSizeMode, +) +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ForecastingSettings(RestTranslatableMixin): + """Forecasting settings for an AutoML Job. + + :param country_or_region_for_holidays: The country/region used to generate holiday features. These should be ISO + 3166 two-letter country/region code, for example 'US' or 'GB'. + :type country_or_region_for_holidays: Optional[str] + :param cv_step_size: + Number of periods between the origin_time of one CV fold and the next fold. For + example, if `n_step` = 3 for daily data, the origin time for each fold will be + three days apart. + :type cv_step_size: Optional[int] + :param forecast_horizon: + The desired maximum forecast horizon in units of time-series frequency. The default value is 1. + + Units are based on the time interval of your training data, e.g., monthly, weekly that the forecaster + should predict out. When task type is forecasting, this parameter is required. For more information on + setting forecasting parameters, see `Auto-train a time-series forecast model <https://learn.microsoft.com/ + azure/machine-learning/how-to-auto-train-forecast>`_. + :type forecast_horizon: Optional[Union[int, str]] + :param target_lags: + The number of past periods to lag from the target column. By default the lags are turned off. + + When forecasting, this parameter represents the number of rows to lag the target values based + on the frequency of the data. This is represented as a list or single integer. Lag should be used + when the relationship between the independent variables and dependent variable do not match up or + correlate by default. For example, when trying to forecast demand for a product, the demand in any + month may depend on the price of specific commodities 3 months prior. In this example, you may want + to lag the target (demand) negatively by 3 months so that the model is training on the correct + relationship. For more information, see `Auto-train a time-series forecast model + <https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-forecast>`_. + + **Note on auto detection of target lags and rolling window size. + Please see the corresponding comments in the rolling window section.** + We use the next algorithm to detect the optimal target lag and rolling window size. + + #. Estimate the maximum lag order for the look back feature selection. In our case it is the number of + periods till the next date frequency granularity i.e. if frequency is daily, it will be a week (7), + if it is a week, it will be month (4). That values multiplied by two is the largest + possible values of lags/rolling windows. In our examples, we will consider the maximum lag + order of 14 and 8 respectively). + #. Create a de-seasonalized series by adding trend and residual components. This will be used + in the next step. + #. Estimate the PACF - Partial Auto Correlation Function on the on the data from (2) + and search for points, where the auto correlation is significant i.e. its absolute + value is more then 1.96/square_root(maximal lag value), which correspond to significance of 95%. + #. If all points are significant, we consider it being strong seasonality + and do not create look back features. + #. We scan the PACF values from the beginning and the value before the first insignificant + auto correlation will designate the lag. If first significant element (value correlate with + itself) is followed by insignificant, the lag will be 0 and we will not use look back features. + :type target_lags: Union[str, int, List[int]] + :param target_rolling_window_size: + The number of past periods used to create a rolling window average of the target column. + + When forecasting, this parameter represents `n` historical periods to use to generate forecasted values, + <= training set size. If omitted, `n` is the full training set size. Specify this parameter + when you only want to consider a certain amount of history when training the model. + If set to 'auto', rolling window will be estimated as the last + value where the PACF is more then the significance threshold. Please see target_lags section for details. + :type target_rolling_window_size: Optional[Union[str, int]] + :param frequency: Forecast frequency. + + When forecasting, this parameter represents the period with which the forecast is desired, + for example daily, weekly, yearly, etc. The forecast frequency is dataset frequency by default. + You can optionally set it to greater (but not lesser) than dataset frequency. + We'll aggregate the data and generate the results at forecast frequency. For example, + for daily data, you can set the frequency to be daily, weekly or monthly, but not hourly. + The frequency needs to be a pandas offset alias. + Please refer to pandas documentation for more information: + https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects + :type frequency: Optional[str] + :param feature_lags: Flag for generating lags for the numeric features with 'auto' or None. + :type feature_lags: Optional[str] + :param seasonality: Set time series seasonality as an integer multiple of the series frequency. + If seasonality is set to 'auto', it will be inferred. + If set to None, the time series is assumed non-seasonal which is equivalent to seasonality=1. + :type seasonality: Optional[Union[int, str]] + :param use_stl: Configure STL Decomposition of the time-series target column. + use_stl can take three values: None (default) - no stl decomposition, 'season' - only generate + season component and season_trend - generate both season and trend components. + :type use_stl: Optional[str] + :param short_series_handling_config: + The parameter defining how if AutoML should handle short time series. + + Possible values: 'auto' (default), 'pad', 'drop' and None. + * **auto** short series will be padded if there are no long series, + otherwise short series will be dropped. + * **pad** all the short series will be padded. + * **drop** all the short series will be dropped". + * **None** the short series will not be modified. + If set to 'pad', the table will be padded with the zeroes and + empty values for the regressors and random values for target with the mean + equal to target value median for given time series id. If median is more or equal + to zero, the minimal padded value will be clipped by zero. + Input: + + +------------+---------------+----------+--------+ + | Date | numeric_value | string | target | + +============+===============+==========+========+ + | 2020-01-01 | 23 | green | 55 | + +------------+---------------+----------+--------+ + + Output assuming minimal number of values is four: + + +------------+---------------+----------+--------+ + | Date | numeric_value | string | target | + +============+===============+==========+========+ + | 2019-12-29 | 0 | NA | 55.1 | + +------------+---------------+----------+--------+ + | 2019-12-30 | 0 | NA | 55.6 | + +------------+---------------+----------+--------+ + | 2019-12-31 | 0 | NA | 54.5 | + +------------+---------------+----------+--------+ + | 2020-01-01 | 23 | green | 55 | + +------------+---------------+----------+--------+ + + **Note:** We have two parameters short_series_handling_configuration and + legacy short_series_handling. When both parameters are set we are + synchronize them as shown in the table below (short_series_handling_configuration and + short_series_handling for brevity are marked as handling_configuration and handling + respectively). + + +------------+--------------------------+----------------------+-----------------------------+ + | | handling | | handling configuration | | resulting handling | | resulting handling | + | | | | | configuration | + +============+==========================+======================+=============================+ + | True | auto | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | pad | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | drop | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | None | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | auto | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | pad | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | drop | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | None | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + + :type short_series_handling_config: Optional[str] + :param target_aggregate_function: The function to be used to aggregate the time series target + column to conform to a user specified frequency. If the + target_aggregation_function is set, but the freq parameter + is not set, the error is raised. The possible target + aggregation functions are: "sum", "max", "min" and "mean". + + * The target column values are aggregated based on the specified operation. + Typically, sum is appropriate for most scenarios. + * Numerical predictor columns in your data are aggregated by sum, mean, minimum value, + and maximum value. As a result, automated ML generates new columns suffixed with the + aggregation function name and applies the selected aggregate operation. + * For categorical predictor columns, the data is aggregated by mode, + the most prominent category in the window. + * Date predictor columns are aggregated by minimum value, maximum value and mode. + + +----------------+-------------------------------+--------------------------------------+ + | | freq | | target_aggregation_function | | Data regularity | + | | | | fixing mechanism | + +================+===============================+======================================+ + | None (Default) | None (Default) | | The aggregation is not | + | | | | applied. If the valid | + | | | | frequency can not be | + | | | | determined the error will | + | | | | be raised. | + +----------------+-------------------------------+--------------------------------------+ + | Some Value | None (Default) | | The aggregation is not | + | | | | applied. If the number | + | | | | of data points compliant | + | | | | to given frequency grid | + | | | | is less then 90% these points | + | | | | will be removed, otherwise | + | | | | the error will be raised. | + +----------------+-------------------------------+--------------------------------------+ + | None (Default) | Aggregation function | | The error about missing | + | | | | frequency parameter | + | | | | is raised. | + +----------------+-------------------------------+--------------------------------------+ + | Some Value | Aggregation function | | Aggregate to frequency using | + | | | | provided aggregation function. | + +----------------+-------------------------------+--------------------------------------+ + :type target_aggregate_function: str + :param time_column_name: + The name of the time column. This parameter is required when forecasting to specify the datetime + column in the input data used for building the time series and inferring its frequency. + :type time_column_name: Optional[str] + :param time_series_id_column_names: + The names of columns used to group a timeseries. + It can be used to create multiple series. If time series id column names is not defined or + the identifier columns specified do not identify all the series in the dataset, the time series identifiers + will be automatically created for your dataset. + :type time_series_id_column_names: Union[str, List[str]] + :param features_unknown_at_forecast_time: + The feature columns that are available for training but unknown at the time of forecast/inference. + If features_unknown_at_forecast_time is set to an empty list, it is assumed that + all the feature columns in the dataset are known at inference time. If this parameter is not set + the support for future features is not enabled. + :type features_unknown_at_forecast_time: Optional[Union[str, List[str]]] + """ + + def __init__( + self, + *, + country_or_region_for_holidays: Optional[str] = None, + cv_step_size: Optional[int] = None, + forecast_horizon: Optional[Union[str, int]] = None, + target_lags: Optional[Union[str, int, List[int]]] = None, + target_rolling_window_size: Optional[Union[str, int]] = None, + frequency: Optional[str] = None, + feature_lags: Optional[str] = None, + seasonality: Optional[Union[str, int]] = None, + use_stl: Optional[str] = None, + short_series_handling_config: Optional[str] = None, + target_aggregate_function: Optional[str] = None, + time_column_name: Optional[str] = None, + time_series_id_column_names: Optional[Union[str, List[str]]] = None, + features_unknown_at_forecast_time: Optional[Union[str, List[str]]] = None, + ): + self.country_or_region_for_holidays = country_or_region_for_holidays + self.cv_step_size = cv_step_size + self.forecast_horizon = forecast_horizon + self.target_lags = target_lags + self.target_rolling_window_size = target_rolling_window_size + self.frequency = frequency + self.feature_lags = feature_lags + self.seasonality = seasonality + self.use_stl = use_stl + self.short_series_handling_config = short_series_handling_config + self.target_aggregate_function = target_aggregate_function + self.time_column_name = time_column_name + self.time_series_id_column_names = time_series_id_column_names + self.features_unknown_at_forecast_time = features_unknown_at_forecast_time + + def _to_rest_object(self) -> RestForecastingSettings: + forecast_horizon = None + if isinstance(self.forecast_horizon, str): + forecast_horizon = AutoForecastHorizon() + elif self.forecast_horizon: + forecast_horizon = CustomForecastHorizon(value=self.forecast_horizon) + + target_lags = None + if isinstance(self.target_lags, str): + target_lags = AutoTargetLags() + elif self.target_lags: + lags = [self.target_lags] if not isinstance(self.target_lags, list) else self.target_lags + target_lags = CustomTargetLags(values=lags) + + target_rolling_window_size = None + if isinstance(self.target_rolling_window_size, str): + target_rolling_window_size = AutoTargetRollingWindowSize() + elif self.target_rolling_window_size: + target_rolling_window_size = CustomTargetRollingWindowSize(value=self.target_rolling_window_size) + + seasonality = None + if isinstance(self.seasonality, str): + seasonality = AutoSeasonality() + elif self.seasonality: + seasonality = CustomSeasonality(value=self.seasonality) + + time_series_id_column_names = self.time_series_id_column_names + if isinstance(self.time_series_id_column_names, str) and self.time_series_id_column_names: + time_series_id_column_names = [self.time_series_id_column_names] + + features_unknown_at_forecast_time = self.features_unknown_at_forecast_time + if isinstance(self.features_unknown_at_forecast_time, str) and self.features_unknown_at_forecast_time: + features_unknown_at_forecast_time = [self.features_unknown_at_forecast_time] + + return RestForecastingSettings( + country_or_region_for_holidays=self.country_or_region_for_holidays, + cv_step_size=self.cv_step_size, + forecast_horizon=forecast_horizon, + time_column_name=self.time_column_name, + target_lags=target_lags, + target_rolling_window_size=target_rolling_window_size, + seasonality=seasonality, + frequency=self.frequency, + feature_lags=self.feature_lags, + use_stl=self.use_stl, + short_series_handling_config=self.short_series_handling_config, + target_aggregate_function=self.target_aggregate_function, + time_series_id_column_names=time_series_id_column_names, + features_unknown_at_forecast_time=features_unknown_at_forecast_time, + ) + + @classmethod + def _from_rest_object(cls, obj: RestForecastingSettings) -> "ForecastingSettings": + forecast_horizon = None + if obj.forecast_horizon and obj.forecast_horizon.mode == ForecastHorizonMode.AUTO: + forecast_horizon = obj.forecast_horizon.mode.lower() + elif obj.forecast_horizon: + forecast_horizon = obj.forecast_horizon.value + + rest_target_lags = obj.target_lags + target_lags = None + if rest_target_lags and rest_target_lags.mode == TargetLagsMode.AUTO: + target_lags = rest_target_lags.mode.lower() + elif rest_target_lags: + target_lags = rest_target_lags.values + + target_rolling_window_size = None + if obj.target_rolling_window_size and obj.target_rolling_window_size.mode == TargetRollingWindowSizeMode.AUTO: + target_rolling_window_size = obj.target_rolling_window_size.mode.lower() + elif obj.target_rolling_window_size: + target_rolling_window_size = obj.target_rolling_window_size.value + + seasonality = None + if obj.seasonality and obj.seasonality.mode == SeasonalityMode.AUTO: + seasonality = obj.seasonality.mode.lower() + elif obj.seasonality: + seasonality = obj.seasonality.value + + return cls( + country_or_region_for_holidays=obj.country_or_region_for_holidays, + cv_step_size=obj.cv_step_size, + forecast_horizon=forecast_horizon, + target_lags=target_lags, + target_rolling_window_size=target_rolling_window_size, + frequency=obj.frequency, + feature_lags=obj.feature_lags, + seasonality=seasonality, + use_stl=obj.use_stl, + short_series_handling_config=obj.short_series_handling_config, + target_aggregate_function=obj.target_aggregate_function, + time_column_name=obj.time_column_name, + time_series_id_column_names=obj.time_series_id_column_names, + features_unknown_at_forecast_time=obj.features_unknown_at_forecast_time, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ForecastingSettings): + return NotImplemented + return ( + self.country_or_region_for_holidays == other.country_or_region_for_holidays + and self.cv_step_size == other.cv_step_size + and self.forecast_horizon == other.forecast_horizon + and self.target_lags == other.target_lags + and self.target_rolling_window_size == other.target_rolling_window_size + and self.frequency == other.frequency + and self.feature_lags == other.feature_lags + and self.seasonality == other.seasonality + and self.use_stl == other.use_stl + and self.short_series_handling_config == other.short_series_handling_config + and self.target_aggregate_function == other.target_aggregate_function + and self.time_column_name == other.time_column_name + and self.time_series_id_column_names == other.time_series_id_column_names + and self.features_unknown_at_forecast_time == other.features_unknown_at_forecast_time + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/limit_settings.py new file mode 100644 index 00000000..1024f504 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/limit_settings.py @@ -0,0 +1,101 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import TableVerticalLimitSettings as RestTabularLimitSettings +from azure.ai.ml._utils.utils import from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class TabularLimitSettings(RestTranslatableMixin): + """Limit settings for a AutoML Table Verticals. + + :param enable_early_termination: Whether to enable early termination if the score is not improving in + the short term. The default is True. + :type enable_early_termination: bool + :param exit_score: Target score for experiment. The experiment terminates after this score is reached. + :type exit_score: float + :param max_concurrent_trials: Maximum number of concurrent AutoML iterations. + :type max_concurrent_trials: int + :param max_cores_per_trial: The maximum number of threads to use for a given training iteration. + :type max_cores_per_trial: int + :param max_nodes: [Experimental] The maximum number of nodes to use for distributed training. + + * For forecasting, each model is trained using max(2, int(max_nodes / max_concurrent_trials)) nodes. + + * For classification/regression, each model is trained using max_nodes nodes. + + Note- This parameter is in public preview and might change in future. + :type max_nodes: int + :param max_trials: Maximum number of AutoML iterations. + :type max_trials: int + :param timeout_minutes: AutoML job timeout. + :type timeout_minutes: int + :param trial_timeout_minutes: AutoML job timeout. + :type trial_timeout_minutes: int + """ + + def __init__( + self, + *, + enable_early_termination: Optional[bool] = None, + exit_score: Optional[float] = None, + max_concurrent_trials: Optional[int] = None, + max_cores_per_trial: Optional[int] = None, + max_nodes: Optional[int] = None, + max_trials: Optional[int] = None, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ): + self.enable_early_termination = enable_early_termination + self.exit_score = exit_score + self.max_concurrent_trials = max_concurrent_trials + self.max_cores_per_trial = max_cores_per_trial + self.max_nodes = max_nodes + self.max_trials = max_trials + self.timeout_minutes = timeout_minutes + self.trial_timeout_minutes = trial_timeout_minutes + + def _to_rest_object(self) -> RestTabularLimitSettings: + return RestTabularLimitSettings( + enable_early_termination=self.enable_early_termination, + exit_score=self.exit_score, + max_concurrent_trials=self.max_concurrent_trials, + max_cores_per_trial=self.max_cores_per_trial, + max_nodes=self.max_nodes, + max_trials=self.max_trials, + timeout=to_iso_duration_format_mins(self.timeout_minutes), + trial_timeout=to_iso_duration_format_mins(self.trial_timeout_minutes), + ) + + @classmethod + def _from_rest_object(cls, obj: RestTabularLimitSettings) -> "TabularLimitSettings": + return cls( + enable_early_termination=obj.enable_early_termination, + exit_score=obj.exit_score, + max_concurrent_trials=obj.max_concurrent_trials, + max_cores_per_trial=obj.max_cores_per_trial, + max_nodes=obj.max_nodes, + max_trials=obj.max_trials, + timeout_minutes=from_iso_duration_format_mins(obj.timeout), + trial_timeout_minutes=from_iso_duration_format_mins(obj.trial_timeout), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TabularLimitSettings): + return NotImplemented + return ( + self.enable_early_termination == other.enable_early_termination + and self.exit_score == other.exit_score + and self.max_concurrent_trials == other.max_concurrent_trials + and self.max_cores_per_trial == other.max_cores_per_trial + and self.max_nodes == other.max_nodes + and self.max_trials == other.max_trials + and self.timeout_minutes == other.timeout_minutes + and self.trial_timeout_minutes == other.trial_timeout_minutes + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/regression_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/regression_job.py new file mode 100644 index 00000000..3531e52c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/regression_job.py @@ -0,0 +1,239 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase +from azure.ai.ml._restclient.v2023_04_01_preview.models import Regression as RestRegression +from azure.ai.ml._restclient.v2023_04_01_preview.models import RegressionPrimaryMetrics, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.tabular import AutoMLTabular, TabularFeaturizationSettings, TabularLimitSettings +from azure.ai.ml.entities._job.automl.training_settings import RegressionTrainingSettings +from azure.ai.ml.entities._util import load_from_dict + + +class RegressionJob(AutoMLTabular): + """Configuration for AutoML Regression Job.""" + + _DEFAULT_PRIMARY_METRIC = RegressionPrimaryMetrics.NORMALIZED_ROOT_MEAN_SQUARED_ERROR + + def __init__( + self, + *, + primary_metric: Optional[str] = None, + **kwargs: Any, + ) -> None: + """Initialize a new AutoML Regression task. + + :param primary_metric: The primary metric to use for optimization + :type primary_metric: str + :param kwargs: Job-specific arguments + :type kwargs: dict + """ + # Extract any task specific settings + featurization = kwargs.pop("featurization", None) + limits = kwargs.pop("limits", None) + training = kwargs.pop("training", None) + + super().__init__( + task_type=TaskType.REGRESSION, + featurization=featurization, + limits=limits, + training=training, + **kwargs, + ) + + self.primary_metric = primary_metric or RegressionJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Union[str, RegressionPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, RegressionPrimaryMetrics]) -> None: + # TODO: better way to do this + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + RegressionJob._DEFAULT_PRIMARY_METRIC + if value is None + else RegressionPrimaryMetrics[camel_to_snake(value).upper()] + ) + + @property + def training(self) -> RegressionTrainingSettings: + return self._training or RegressionTrainingSettings() + + @training.setter + def training(self, value: Union[Dict, RegressionTrainingSettings]) -> None: # pylint: disable=unused-argument + ... + + def _to_rest_object(self) -> JobBase: + regression_task = RestRegression( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + weight_column_name=self.weight_column_name, + cv_split_column_names=self.cv_split_column_names, + n_cross_validations=self.n_cross_validations, + test_data=self.test_data, + test_data_size=self.test_data_size, + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + limit_settings=self._limits._to_rest_object() if self._limits else None, + training_settings=self._training._to_rest_object() if self._training else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + self._resolve_data_inputs(regression_task) + self._validation_data_to_rest(regression_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=regression_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "RegressionJob": + properties: RestAutoMLJob = obj.properties + task_details: RestRegression = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + regression_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + weight_column_name=task_details.weight_column_name, + cv_split_column_names=task_details.cv_split_column_names, + n_cross_validations=task_details.n_cross_validations, + test_data=task_details.test_data, + test_data_size=task_details.test_data_size, + featurization=( + TabularFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ), + limits=( + TabularLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + training=( + RegressionTrainingSettings._from_rest_object(task_details.training_settings) + if task_details.training_settings + else None + ), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + regression_job._restore_data_inputs() + regression_job._validation_data_from_rest() + + return regression_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "RegressionJob": + from azure.ai.ml._schema.automl.table_vertical.regression import AutoMLRegressionSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLRegressionNodeSchema + + if kwargs.pop("inside_pipeline", False): + loaded_data = load_from_dict(AutoMLRegressionNodeSchema, data, context, additional_message, **kwargs) + else: + loaded_data = load_from_dict(AutoMLRegressionSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "RegressionJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "weight_column_name": loaded_data.pop("weight_column_name", None), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + "cv_split_column_names": loaded_data.pop("cv_split_column_names", None), + "n_cross_validations": loaded_data.pop("n_cross_validations", None), + "test_data": loaded_data.pop("test_data", None), + "test_data_size": loaded_data.pop("test_data_size", None), + } + job = RegressionJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.table_vertical.regression import AutoMLRegressionSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLRegressionNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = AutoMLRegressionNodeSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + else: + schema_dict = AutoMLRegressionSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, RegressionJob): + return NotImplemented + + if not super(RegressionJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/training_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/training_settings.py new file mode 100644 index 00000000..97bc7e17 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/training_settings.py @@ -0,0 +1,357 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=R0902,protected-access + +from typing import Any, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationModels +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ClassificationTrainingSettings as RestClassificationTrainingSettings, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ForecastingModels +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ForecastingTrainingSettings as RestForecastingTrainingSettings, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import RegressionModels +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + RegressionTrainingSettings as RestRegressionTrainingSettings, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import TrainingSettings as RestTrainingSettings +from azure.ai.ml._utils.utils import camel_to_snake, from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.constants import TabularTrainingMode +from azure.ai.ml.entities._job.automl.stack_ensemble_settings import StackEnsembleSettings +from azure.ai.ml.entities._mixins import RestTranslatableMixin +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class TrainingSettings(RestTranslatableMixin): + """TrainingSettings class for Azure Machine Learning.""" + + def __init__( + self, + *, + enable_onnx_compatible_models: Optional[bool] = None, + enable_dnn_training: Optional[bool] = None, + enable_model_explainability: Optional[bool] = None, + enable_stack_ensemble: Optional[bool] = None, + enable_vote_ensemble: Optional[bool] = None, + stack_ensemble_settings: Optional[StackEnsembleSettings] = None, + ensemble_model_download_timeout: Optional[int] = None, + allowed_training_algorithms: Optional[List[str]] = None, + blocked_training_algorithms: Optional[List[str]] = None, + training_mode: Optional[Union[str, TabularTrainingMode]] = None, + ): + """TrainingSettings class for Azure Machine Learning. + + :param enable_onnx_compatible_models: If set to True, the model will be trained to be compatible with ONNX + :type enable_onnx_compatible_models: typing.Optional[bool] + :param enable_dnn_training: If set to True,the model will use DNN training + :type enable_dnn_training: typing.Optional[bool] + :param enable_model_explainability: If set to True, the model will be trained to be explainable + :type enable_model_explainability: typing.Optional[bool] + :param enable_stack_ensemble: If set to True, a final ensemble model will be created using a stack of models + :type enable_stack_ensemble: typing.Optional[bool] + :param enable_vote_ensemble: If set to True, a final ensemble model will be created using a voting ensemble + :type enable_vote_ensemble: typing.Optional[bool] + :param stack_ensemble_settings: Settings for stack ensemble + :type stack_ensemble_settings: typing.Optional[azure.ai.ml.automl.StackEnsembleSettings] + :param ensemble_model_download_timeout: Timeout for downloading ensemble models + :type ensemble_model_download_timeout: typing.Optional[typing.List[int]] + :param allowed_training_algorithms: Models to train + :type allowed_training_algorithms: typing.Optional[typing.List[str]] + :param blocked_training_algorithms: Models that will not be considered for training + :type blocked_training_algorithms: typing.Optional[typing.List[str]] + :param training_mode: [Experimental] The training mode to use. + The possible values are- + + * distributed- enables distributed training for supported algorithms. + + * non_distributed- disables distributed training. + + * auto- Currently, it is same as non_distributed. In future, this might change. + + Note: This parameter is in public preview and may change in future. + :type training_mode: typing.Optional[typing.Union[str, azure.ai.ml.constants.TabularTrainingMode]] + """ + self.enable_onnx_compatible_models = enable_onnx_compatible_models + self.enable_dnn_training = enable_dnn_training + self.enable_model_explainability = enable_model_explainability + self.enable_stack_ensemble = enable_stack_ensemble + self.enable_vote_ensemble = enable_vote_ensemble + self.stack_ensemble_settings = stack_ensemble_settings + self.ensemble_model_download_timeout = ensemble_model_download_timeout + self.allowed_training_algorithms = allowed_training_algorithms + self.blocked_training_algorithms = blocked_training_algorithms + self.training_mode = training_mode + + @property + def training_mode(self) -> Optional[TabularTrainingMode]: + return self._training_mode + + @training_mode.setter + def training_mode(self, value: Optional[Union[str, TabularTrainingMode]]) -> None: + if value is None or value is TabularTrainingMode: + self._training_mode = value + elif hasattr(TabularTrainingMode, camel_to_snake(value).upper()): + self._training_mode = TabularTrainingMode[camel_to_snake(value).upper()] + else: + supported_values = ", ".join([f'"{camel_to_snake(mode.value)}"' for mode in TabularTrainingMode]) + msg = ( + f"Unsupported training mode: {value}. Supported values are- {supported_values}. " + "Or you can use azure.ai.ml.constants.TabularTrainingMode enum." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + @property + def allowed_training_algorithms(self) -> Optional[List[str]]: + return self._allowed_training_algorithms + + @allowed_training_algorithms.setter + def allowed_training_algorithms(self, value: Optional[List[str]]) -> None: + self._allowed_training_algorithms = value + + @property + def blocked_training_algorithms(self) -> Optional[List[str]]: + return self._blocked_training_algorithms + + @blocked_training_algorithms.setter + def blocked_training_algorithms(self, value: Optional[List[str]]) -> None: + self._blocked_training_algorithms = value + + def _to_rest_object(self) -> RestTrainingSettings: + return RestTrainingSettings( + enable_dnn_training=self.enable_dnn_training, + enable_onnx_compatible_models=self.enable_onnx_compatible_models, + enable_model_explainability=self.enable_model_explainability, + enable_stack_ensemble=self.enable_stack_ensemble, + enable_vote_ensemble=self.enable_vote_ensemble, + stack_ensemble_settings=( + self.stack_ensemble_settings._to_rest_object() if self.stack_ensemble_settings else None + ), + ensemble_model_download_timeout=to_iso_duration_format_mins(self.ensemble_model_download_timeout), + training_mode=self.training_mode, + ) + + @classmethod + def _from_rest_object(cls, obj: RestTrainingSettings) -> "TrainingSettings": + return cls( + enable_dnn_training=obj.enable_dnn_training, + enable_onnx_compatible_models=obj.enable_onnx_compatible_models, + enable_model_explainability=obj.enable_model_explainability, + enable_stack_ensemble=obj.enable_stack_ensemble, + enable_vote_ensemble=obj.enable_vote_ensemble, + ensemble_model_download_timeout=from_iso_duration_format_mins(obj.ensemble_model_download_timeout), + stack_ensemble_settings=( + StackEnsembleSettings._from_rest_object(obj.stack_ensemble_settings) + if obj.stack_ensemble_settings + else None + ), + training_mode=obj.training_mode, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TrainingSettings): + return NotImplemented + return ( + self.enable_dnn_training == other.enable_dnn_training + and self.enable_onnx_compatible_models == other.enable_onnx_compatible_models + and self.enable_model_explainability == other.enable_model_explainability + and self.enable_stack_ensemble == other.enable_stack_ensemble + and self.enable_vote_ensemble == other.enable_vote_ensemble + and self.ensemble_model_download_timeout == other.ensemble_model_download_timeout + and self.stack_ensemble_settings == other.stack_ensemble_settings + and self.allowed_training_algorithms == other.allowed_training_algorithms + and self.blocked_training_algorithms == other.blocked_training_algorithms + and self.training_mode == other.training_mode + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +class ClassificationTrainingSettings(TrainingSettings): + """Classification TrainingSettings class for Azure Machine Learning.""" + + def __init__( + self, + **kwargs: Any, + ): + super().__init__(**kwargs) + + @property + def allowed_training_algorithms(self) -> Optional[List]: + return self._allowed_training_algorithms + + @allowed_training_algorithms.setter + def allowed_training_algorithms(self, allowed_model_list: Union[List[str], List[ClassificationModels]]) -> None: + self._allowed_training_algorithms = ( + None + if allowed_model_list is None + else [ClassificationModels[camel_to_snake(o)] for o in allowed_model_list] + ) + + @property + def blocked_training_algorithms(self) -> Optional[List]: + return self._blocked_training_algorithms + + @blocked_training_algorithms.setter + def blocked_training_algorithms(self, blocked_model_list: Union[List[str], List[ClassificationModels]]) -> None: + self._blocked_training_algorithms = ( + None + if blocked_model_list is None + else [ClassificationModels[camel_to_snake(o)] for o in blocked_model_list] + ) + + def _to_rest_object(self) -> RestClassificationTrainingSettings: + return RestClassificationTrainingSettings( + enable_dnn_training=self.enable_dnn_training, + enable_onnx_compatible_models=self.enable_onnx_compatible_models, + enable_model_explainability=self.enable_model_explainability, + enable_stack_ensemble=self.enable_stack_ensemble, + enable_vote_ensemble=self.enable_vote_ensemble, + stack_ensemble_settings=self.stack_ensemble_settings, + ensemble_model_download_timeout=to_iso_duration_format_mins(self.ensemble_model_download_timeout), + allowed_training_algorithms=self.allowed_training_algorithms, + blocked_training_algorithms=self.blocked_training_algorithms, + training_mode=self.training_mode, + ) + + @classmethod + def _from_rest_object(cls, obj: RestClassificationTrainingSettings) -> "ClassificationTrainingSettings": + return cls( + enable_dnn_training=obj.enable_dnn_training, + enable_onnx_compatible_models=obj.enable_onnx_compatible_models, + enable_model_explainability=obj.enable_model_explainability, + enable_stack_ensemble=obj.enable_stack_ensemble, + enable_vote_ensemble=obj.enable_vote_ensemble, + ensemble_model_download_timeout=from_iso_duration_format_mins(obj.ensemble_model_download_timeout), + stack_ensemble_settings=obj.stack_ensemble_settings, + allowed_training_algorithms=obj.allowed_training_algorithms, + blocked_training_algorithms=obj.blocked_training_algorithms, + training_mode=obj.training_mode, + ) + + +class ForecastingTrainingSettings(TrainingSettings): + """Forecasting TrainingSettings class for Azure Machine Learning.""" + + def __init__( + self, + **kwargs: Any, + ): + super().__init__(**kwargs) + + @property + def allowed_training_algorithms(self) -> Optional[List]: + return self._allowed_training_algorithms + + @allowed_training_algorithms.setter + def allowed_training_algorithms(self, allowed_model_list: Union[List[str], List[ForecastingModels]]) -> None: + self._allowed_training_algorithms = ( + None if allowed_model_list is None else [ForecastingModels[camel_to_snake(o)] for o in allowed_model_list] + ) + + @property + def blocked_training_algorithms(self) -> Optional[List]: + return self._blocked_training_algorithms + + @blocked_training_algorithms.setter + def blocked_training_algorithms(self, blocked_model_list: Union[List[str], List[ForecastingModels]]) -> None: + self._blocked_training_algorithms = ( + None if blocked_model_list is None else [ForecastingModels[camel_to_snake(o)] for o in blocked_model_list] + ) + + def _to_rest_object(self) -> RestForecastingTrainingSettings: + return RestForecastingTrainingSettings( + enable_dnn_training=self.enable_dnn_training, + enable_onnx_compatible_models=self.enable_onnx_compatible_models, + enable_model_explainability=self.enable_model_explainability, + enable_stack_ensemble=self.enable_stack_ensemble, + enable_vote_ensemble=self.enable_vote_ensemble, + stack_ensemble_settings=self.stack_ensemble_settings, + ensemble_model_download_timeout=to_iso_duration_format_mins(self.ensemble_model_download_timeout), + allowed_training_algorithms=self.allowed_training_algorithms, + blocked_training_algorithms=self.blocked_training_algorithms, + training_mode=self.training_mode, + ) + + @classmethod + def _from_rest_object(cls, obj: RestForecastingTrainingSettings) -> "ForecastingTrainingSettings": + return cls( + enable_dnn_training=obj.enable_dnn_training, + enable_onnx_compatible_models=obj.enable_onnx_compatible_models, + enable_model_explainability=obj.enable_model_explainability, + enable_stack_ensemble=obj.enable_stack_ensemble, + enable_vote_ensemble=obj.enable_vote_ensemble, + ensemble_model_download_timeout=from_iso_duration_format_mins(obj.ensemble_model_download_timeout), + stack_ensemble_settings=obj.stack_ensemble_settings, + allowed_training_algorithms=obj.allowed_training_algorithms, + blocked_training_algorithms=obj.blocked_training_algorithms, + training_mode=obj.training_mode, + ) + + +class RegressionTrainingSettings(TrainingSettings): + """Regression TrainingSettings class for Azure Machine Learning.""" + + def __init__( + self, + **kwargs: Any, + ): + super().__init__(**kwargs) + + @property + def allowed_training_algorithms(self) -> Optional[List]: + return self._allowed_training_algorithms + + @allowed_training_algorithms.setter + def allowed_training_algorithms(self, allowed_model_list: Union[List[str], List[ForecastingModels]]) -> None: + self._allowed_training_algorithms = ( + None if allowed_model_list is None else [RegressionModels[camel_to_snake(o)] for o in allowed_model_list] + ) + + @property + def blocked_training_algorithms(self) -> Optional[List]: + return self._blocked_training_algorithms + + @blocked_training_algorithms.setter + def blocked_training_algorithms(self, blocked_model_list: Union[List[str], List[ForecastingModels]]) -> None: + self._blocked_training_algorithms = ( + None if blocked_model_list is None else [RegressionModels[camel_to_snake(o)] for o in blocked_model_list] + ) + + def _to_rest_object(self) -> RestRegressionTrainingSettings: + return RestRegressionTrainingSettings( + enable_dnn_training=self.enable_dnn_training, + enable_onnx_compatible_models=self.enable_onnx_compatible_models, + enable_model_explainability=self.enable_model_explainability, + enable_stack_ensemble=self.enable_stack_ensemble, + enable_vote_ensemble=self.enable_vote_ensemble, + stack_ensemble_settings=self.stack_ensemble_settings, + ensemble_model_download_timeout=to_iso_duration_format_mins(self.ensemble_model_download_timeout), + allowed_training_algorithms=self.allowed_training_algorithms, + blocked_training_algorithms=self.blocked_training_algorithms, + training_mode=self.training_mode, + ) + + @classmethod + def _from_rest_object(cls, obj: RestRegressionTrainingSettings) -> "RegressionTrainingSettings": + return cls( + enable_dnn_training=obj.enable_dnn_training, + enable_onnx_compatible_models=obj.enable_onnx_compatible_models, + enable_model_explainability=obj.enable_model_explainability, + enable_stack_ensemble=obj.enable_stack_ensemble, + enable_vote_ensemble=obj.enable_vote_ensemble, + ensemble_model_download_timeout=from_iso_duration_format_mins(obj.ensemble_model_download_timeout), + stack_ensemble_settings=obj.stack_ensemble_settings, + allowed_training_algorithms=obj.allowed_training_algorithms, + blocked_training_algorithms=obj.blocked_training_algorithms, + training_mode=obj.training_mode, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/utils.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/utils.py new file mode 100644 index 00000000..08521d7e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/utils.py @@ -0,0 +1,47 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import TYPE_CHECKING, Dict, Type, Union + +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + +if TYPE_CHECKING: + from azure.ai.ml.entities._job.automl.image.image_classification_search_space import ImageClassificationSearchSpace + from azure.ai.ml.entities._job.automl.image.image_object_detection_search_space import ( + ImageObjectDetectionSearchSpace, + ) + from azure.ai.ml.entities._job.automl.nlp.nlp_search_space import NlpSearchSpace + from azure.ai.ml.entities._job.automl.search_space import SearchSpace + + +def cast_to_specific_search_space( + input: Union[Dict, "SearchSpace"], # pylint: disable=redefined-builtin + class_name: Union[ + Type["ImageClassificationSearchSpace"], Type["ImageObjectDetectionSearchSpace"], Type["NlpSearchSpace"] + ], + task_type: str, +) -> Union["ImageClassificationSearchSpace", "ImageObjectDetectionSearchSpace", "NlpSearchSpace"]: + def validate_searchspace_args(input_dict: dict) -> None: + searchspace = class_name() + for key in input_dict: + if not hasattr(searchspace, key): + msg = f"Received unsupported search space parameter for {task_type} Job." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + if isinstance(input, dict): + validate_searchspace_args(input) + specific_search_space = class_name(**input) + else: + validate_searchspace_args(input.__dict__) + specific_search_space = class_name._from_search_space_object(input) # pylint: disable=protected-access + + res: Union["ImageClassificationSearchSpace", "ImageObjectDetectionSearchSpace", "NlpSearchSpace"] = ( + specific_search_space + ) + return res diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/base_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/base_job.py new file mode 100644 index 00000000..72b464e5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/base_job.py @@ -0,0 +1,85 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from typing import Any, Dict + +from azure.ai.ml._restclient.runhistory.models import Run +from azure.ai.ml._schema.job import BaseJobSchema +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +from .job import Job + +module_logger = logging.getLogger(__name__) + +""" +TODO[Joe]: This class is temporarily created to handle "Base" job type from the service. + We will be working on a more granular job type for pipeline child jobs in the future. + Spec Ref: https://github.com/Azure/azureml_run_specification/pull/340 + MFE PR: https://msdata.visualstudio.com/DefaultCollection/Vienna/_workitems/edit/1167303/ +""" + + +class _BaseJob(Job): + """Base Job, only used in pipeline child jobs. + + :param name: Name of the resource. + :type name: str + :param description: Description of the resource. + :type description: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param experiment_name: Name of the experiment the job will be created under, + if None is provided, default will be set to current directory name. + :type experiment_name: str + :param services: Information on services associated with the job, readonly. + :type services: dict[str, JobService] + :param compute: The compute target the job runs on. + :type compute: str + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__(self, **kwargs: Any): + kwargs[TYPE] = JobType.BASE + + super().__init__(**kwargs) + + def _to_dict(self) -> Dict: + res: dict = BaseJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "_BaseJob": + loaded_data = load_from_dict(BaseJobSchema, data, context, additional_message, **kwargs) + return _BaseJob(**loaded_data) + + @classmethod + def _load_from_rest(cls, obj: Run) -> "_BaseJob": + creation_context = SystemData( + created_by=obj.created_by, + created_by_type=obj.created_from, + created_at=obj.created_utc, + last_modified_by=obj.last_modified_by, + last_modified_at=obj.last_modified_utc, + ) + base_job = _BaseJob( + name=obj.run_id, + display_name=obj.display_name, + description=obj.description, + tags=obj.tags, + properties=obj.properties, + experiment_name=obj.experiment_id, + services=obj.services, + status=obj.status, + creation_context=creation_context, + compute=f"{obj.compute.target}" if obj.compute else None, + ) + + return base_job diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/command_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/command_job.py new file mode 100644 index 00000000..0a0c7e82 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/command_job.py @@ -0,0 +1,314 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import copy +import logging +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2025_01_01_preview.models import CommandJob as RestCommandJob +from azure.ai.ml._restclient.v2025_01_01_preview.models import JobBase +from azure.ai.ml._schema.job.command_job import CommandJobSchema +from azure.ai.ml._utils.utils import map_single_brackets_and_warn +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, LOCAL_COMPUTE_PROPERTY, LOCAL_COMPUTE_TARGET, TYPE +from azure.ai.ml.entities import Environment +from azure.ai.ml.entities._credentials import ( + AmlTokenConfiguration, + ManagedIdentityConfiguration, + UserIdentityConfiguration, + _BaseJobIdentityConfiguration, +) +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._job._input_output_helpers import ( + from_rest_data_outputs, + from_rest_inputs_to_dataset_literal, + to_rest_data_outputs, + to_rest_dataset_literal_inputs, + validate_inputs_for_command, +) +from azure.ai.ml.entities._job.distribution import DistributionConfiguration +from azure.ai.ml.entities._job.job_service import ( + JobService, + JobServiceBase, + JupyterLabJobService, + SshJobService, + TensorBoardJobService, + VsCodeJobService, +) +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException + +from .job import Job +from .job_io_mixin import JobIOMixin +from .job_limits import CommandJobLimits +from .job_resource_configuration import JobResourceConfiguration +from .parameterized_command import ParameterizedCommand +from .queue_settings import QueueSettings + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities import CommandComponent + from azure.ai.ml.entities._builders import Command + +module_logger = logging.getLogger(__name__) + + +class CommandJob(Job, ParameterizedCommand, JobIOMixin): + """Command job. + + .. note:: + For sweep jobs, inputs, outputs, and parameters are accessible as environment variables using the prefix + ``AZUREML_PARAMETER_``. For example, if you have a parameter named "input_data", you can access it as + ``AZUREML_PARAMETER_input_data``. + + :keyword services: Read-only information on services associated with the job. + :paramtype services: Optional[dict[str, ~azure.ai.ml.entities.JobService]] + :keyword inputs: Mapping of output data bindings used in the command. + :paramtype inputs: Optional[dict[str, Union[~azure.ai.ml.Input, str, bool, int, float]]] + :keyword outputs: Mapping of output data bindings used in the job. + :paramtype outputs: Optional[dict[str, ~azure.ai.ml.Output]] + :keyword identity: The identity that the job will use while running on compute. + :paramtype identity: Optional[Union[~azure.ai.ml.ManagedIdentityConfiguration, ~azure.ai.ml.AmlTokenConfiguration, + ~azure.ai.ml.UserIdentityConfiguration]] + :keyword limits: The limits for the job. + :paramtype limits: Optional[~azure.ai.ml.entities.CommandJobLimits] + :keyword parent_job_name: parent job id for command job + :paramtype parent_job_name: Optional[str] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_command_configurations.py + :start-after: [START command_job_definition] + :end-before: [END command_job_definition] + :language: python + :dedent: 8 + :caption: Configuring a CommandJob. + """ + + def __init__( + self, + *, + inputs: Optional[Dict[str, Union[Input, str, bool, int, float]]] = None, + outputs: Optional[Dict[str, Output]] = None, + limits: Optional[CommandJobLimits] = None, + identity: Optional[ + Union[Dict, ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration] + ] = None, + services: Optional[ + Dict[str, Union[JobService, JupyterLabJobService, SshJobService, TensorBoardJobService, VsCodeJobService]] + ] = None, + parent_job_name: Optional[str] = None, + **kwargs: Any, + ) -> None: + kwargs[TYPE] = JobType.COMMAND + self._parameters: dict = kwargs.pop("parameters", {}) + self.parent_job_name = parent_job_name + + super().__init__(**kwargs) + + self.outputs = outputs # type: ignore[assignment] + self.inputs = inputs # type: ignore[assignment] + self.limits = limits + self.identity = identity + self.services = services + + @property + def parameters(self) -> Dict[str, str]: + """MLFlow parameters. + + :return: MLFlow parameters logged in job. + :rtype: dict[str, str] + """ + return self._parameters + + def _to_dict(self) -> Dict: + res: dict = CommandJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def _to_rest_object(self) -> JobBase: + self._validate() + self.command = map_single_brackets_and_warn(self.command) + modified_properties = copy.deepcopy(self.properties) + # Remove any properties set on the service as read-only + modified_properties.pop("_azureml.ComputeTargetType", None) + # Handle local compute case + compute = self.compute + resources = self.resources + if self.compute == LOCAL_COMPUTE_TARGET: + compute = None + if resources is None: + resources = JobResourceConfiguration() + if not isinstance(resources, Dict): + if resources.properties is None: + resources.properties = {} + # This is the format of the October Api response. We need to match it exactly + resources.properties[LOCAL_COMPUTE_PROPERTY] = {LOCAL_COMPUTE_PROPERTY: True} + + properties = RestCommandJob( + display_name=self.display_name, + description=self.description, + command=self.command, + code_id=self.code, + compute_id=compute, + properties=modified_properties, + experiment_name=self.experiment_name, + inputs=to_rest_dataset_literal_inputs(self.inputs, job_type=self.type), + outputs=to_rest_data_outputs(self.outputs), + environment_id=self.environment, + distribution=( + self.distribution._to_rest_object() + if self.distribution and not isinstance(self.distribution, Dict) + else None + ), + tags=self.tags, + identity=( + self.identity._to_job_rest_object() if self.identity and not isinstance(self.identity, Dict) else None + ), + environment_variables=self.environment_variables, + resources=resources._to_rest_object() if resources and not isinstance(resources, Dict) else None, + limits=self.limits._to_rest_object() if self.limits else None, + services=JobServiceBase._to_rest_job_services(self.services), + queue_settings=self.queue_settings._to_rest_object() if self.queue_settings else None, + parent_job_name=self.parent_job_name, + ) + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "CommandJob": + loaded_data = load_from_dict(CommandJobSchema, data, context, additional_message, **kwargs) + return CommandJob(base_path=context[BASE_PATH_CONTEXT_KEY], **loaded_data) + + @classmethod + def _load_from_rest(cls, obj: JobBase) -> "CommandJob": + rest_command_job: RestCommandJob = obj.properties + command_job = CommandJob( + name=obj.name, + id=obj.id, + display_name=rest_command_job.display_name, + description=rest_command_job.description, + tags=rest_command_job.tags, + properties=rest_command_job.properties, + command=rest_command_job.command, + experiment_name=rest_command_job.experiment_name, + services=JobServiceBase._from_rest_job_services(rest_command_job.services), + status=rest_command_job.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + code=rest_command_job.code_id, + compute=rest_command_job.compute_id, + environment=rest_command_job.environment_id, + distribution=DistributionConfiguration._from_rest_object(rest_command_job.distribution), + parameters=rest_command_job.parameters, + # pylint: disable=protected-access + identity=( + _BaseJobIdentityConfiguration._from_rest_object(rest_command_job.identity) + if rest_command_job.identity + else None + ), + environment_variables=rest_command_job.environment_variables, + resources=JobResourceConfiguration._from_rest_object(rest_command_job.resources), + limits=CommandJobLimits._from_rest_object(rest_command_job.limits), + inputs=from_rest_inputs_to_dataset_literal(rest_command_job.inputs), + outputs=from_rest_data_outputs(rest_command_job.outputs), + queue_settings=QueueSettings._from_rest_object(rest_command_job.queue_settings), + parent_job_name=rest_command_job.parent_job_name, + ) + # Handle special case of local job + if ( + command_job.resources is not None + and not isinstance(command_job.resources, Dict) + and command_job.resources.properties is not None + and command_job.resources.properties.get(LOCAL_COMPUTE_PROPERTY, None) + ): + command_job.compute = LOCAL_COMPUTE_TARGET + command_job.resources.properties.pop(LOCAL_COMPUTE_PROPERTY) + return command_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "CommandComponent": + """Translate a command job to component. + + :param context: Context of command job YAML file. + :type context: dict + :return: Translated command component. + :rtype: CommandComponent + """ + from azure.ai.ml.entities import CommandComponent + + pipeline_job_dict = kwargs.get("pipeline_job_dict", {}) + context = context or {BASE_PATH_CONTEXT_KEY: Path("./")} + + # Create anonymous command component with default version as 1 + return CommandComponent( + tags=self.tags, + is_anonymous=True, + base_path=context[BASE_PATH_CONTEXT_KEY], + code=self.code, + command=self.command, + environment=self.environment, + description=self.description, + inputs=self._to_inputs(inputs=self.inputs, pipeline_job_dict=pipeline_job_dict), + outputs=self._to_outputs(outputs=self.outputs, pipeline_job_dict=pipeline_job_dict), + resources=self.resources if self.resources else None, + distribution=self.distribution if self.distribution else None, + ) + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "Command": + """Translate a command job to a pipeline node. + + :param context: Context of command job YAML file. + :type context: dict + :return: Translated command component. + :rtype: Command + """ + from azure.ai.ml.entities._builders import Command + + component = self._to_component(context, **kwargs) + + return Command( + component=component, + compute=self.compute, + # Need to supply the inputs with double curly. + inputs=self.inputs, # type: ignore[arg-type] + outputs=self.outputs, # type: ignore[arg-type] + environment_variables=self.environment_variables, + description=self.description, + tags=self.tags, + display_name=self.display_name, + limits=self.limits, + services=self.services, + properties=self.properties, + identity=self.identity, + queue_settings=self.queue_settings, + ) + + def _validate(self) -> None: + if self.command is None: + msg = "command is required" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.MISSING_FIELD, + ) + if self.environment is None: + msg = "environment is required for non-local runs" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.MISSING_FIELD, + ) + if isinstance(self.environment, Environment): + self.environment.validate() + validate_inputs_for_command(self.command, self.inputs) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/compute_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/compute_configuration.py new file mode 100644 index 00000000..dcc00825 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/compute_configuration.py @@ -0,0 +1,110 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import json +import logging +from typing import Any, Dict, Optional + +from azure.ai.ml._restclient.v2020_09_01_dataplanepreview.models import ComputeConfiguration as RestComputeConfiguration +from azure.ai.ml.constants._common import LOCAL_COMPUTE_TARGET +from azure.ai.ml.constants._job.job import JobComputePropertyFields +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin + +module_logger = logging.getLogger(__name__) + + +class ComputeConfiguration(RestTranslatableMixin, DictMixin): + """Compute resource configuration + + :param target: The compute target. + :type target: Optional[str] + :param instance_count: The number of instances. + :type instance_count: Optional[int] + :param is_local: Specifies if the compute will be on the local machine. + :type is_local: Optional[bool] + :param location: The location of the compute resource. + :type location: Optional[str] + :param properties: The resource properties + :type properties: Optional[Dict[str, Any]] + :param deserialize_properties: Specifies if property bag should be deserialized. Defaults to False. + :type deserialize_properties: bool + """ + + def __init__( + self, + *, + target: Optional[str] = None, + instance_count: Optional[int] = None, + is_local: Optional[bool] = None, + instance_type: Optional[str] = None, + location: Optional[str] = None, + properties: Optional[Dict[str, Any]] = None, + deserialize_properties: bool = False, + ) -> None: + self.instance_count = instance_count + self.target = target or LOCAL_COMPUTE_TARGET + self.is_local = is_local or self.target == LOCAL_COMPUTE_TARGET + self.instance_type = instance_type + self.location = location + self.properties = properties + if deserialize_properties and properties and self.properties is not None: + for key, value in self.properties.items(): + try: + self.properties[key] = json.loads(value) + except Exception: # pylint: disable=W0718 + # keep serialized string if load fails + pass + + def _to_rest_object(self) -> RestComputeConfiguration: + if self.properties: + serialized_properties = {} + for key, value in self.properties.items(): + try: + if key.lower() == JobComputePropertyFields.SINGULARITY.lower(): + # Map Singularity -> AISupercomputer in SDK until MFE does mapping + key = JobComputePropertyFields.AISUPERCOMPUTER + # Ensure keymatch is case invariant + elif key.lower() == JobComputePropertyFields.AISUPERCOMPUTER.lower(): + key = JobComputePropertyFields.AISUPERCOMPUTER + serialized_properties[key] = json.dumps(value) + except Exception: # pylint: disable=W0718 + pass + else: + serialized_properties = None + return RestComputeConfiguration( + target=self.target if not self.is_local else None, + is_local=self.is_local, + instance_count=self.instance_count, + instance_type=self.instance_type, + location=self.location, + properties=serialized_properties, + ) + + @classmethod + def _from_rest_object(cls, obj: RestComputeConfiguration) -> "ComputeConfiguration": + return ComputeConfiguration( + target=obj.target, + is_local=obj.is_local, + instance_count=obj.instance_count, + location=obj.location, + instance_type=obj.instance_type, + properties=obj.properties, + deserialize_properties=True, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ComputeConfiguration): + return NotImplemented + return ( + self.instance_count == other.instance_count + and self.target == other.target + and self.is_local == other.is_local + and self.location == other.location + and self.instance_type == other.instance_type + ) + + def __ne__(self, other: object) -> bool: + if not isinstance(other, ComputeConfiguration): + return NotImplemented + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/data_transfer/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/data_transfer/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/data_transfer/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/data_transfer/data_transfer_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/data_transfer/data_transfer_job.py new file mode 100644 index 00000000..b510da80 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/data_transfer/data_transfer_job.py @@ -0,0 +1,358 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase +from azure.ai.ml._schema.job.data_transfer_job import ( + DataTransferCopyJobSchema, + DataTransferExportJobSchema, + DataTransferImportJobSchema, +) +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.constants._component import DataTransferBuiltinComponentUri, DataTransferTaskType, ExternalDataType +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._inputs_outputs.external_data import Database, FileSystem +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException + +from ..job import Job +from ..job_io_mixin import JobIOMixin + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._builders import DataTransferCopy, DataTransferExport, DataTransferImport + from azure.ai.ml.entities._component.datatransfer_component import DataTransferCopyComponent + +module_logger = logging.getLogger(__name__) + + +class DataTransferJob(Job, JobIOMixin): + """DataTransfer job. + + :param name: Name of the job. + :type name: str + :param description: Description of the job. + :type description: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param display_name: Display name of the job. + :type display_name: str + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param experiment_name: Name of the experiment the job will be created under. + If None is provided, default will be set to current directory name. + :type experiment_name: str + :param services: Information on services associated with the job, readonly. + :type services: dict[str, JobService] + :param inputs: Inputs to the command. + :type inputs: dict[str, Union[azure.ai.ml.Input, str, bool, int, float]] + :param outputs: Mapping of output data bindings used in the job. + :type outputs: dict[str, azure.ai.ml.Output] + :param compute: The compute target the job runs on. + :type compute: str + :param task: task type in data transfer component, possible value is "copy_data". + :type task: str + :param data_copy_mode: data copy mode in copy task, possible value is "merge_with_overwrite", "fail_if_conflict". + :type data_copy_mode: str + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + """ + + def __init__( + self, + task: str, + **kwargs: Any, + ): + kwargs[TYPE] = JobType.DATA_TRANSFER + self._parameters: Dict = kwargs.pop("parameters", {}) + super().__init__(**kwargs) + self.task = task + + @property + def parameters(self) -> Dict: + """MLFlow parameters. + + :return: MLFlow parameters logged in job. + :rtype: Dict[str, str] + """ + return self._parameters + + def _validate(self) -> None: + if self.compute is None: + msg = "compute is required" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.MISSING_FIELD, + ) + + @classmethod + def _load_from_rest(cls, obj: JobBase) -> "DataTransferJob": + # Todo: need update rest api + raise NotImplementedError("Not support submit standalone job for now") + + def _to_rest_object(self) -> JobBase: + # Todo: need update rest api + raise NotImplementedError("Not support submit standalone job for now") + + @classmethod + def _build_source_sink( + cls, io_dict: Optional[Union[Dict, Database, FileSystem]] + ) -> Optional[Union[(Database, FileSystem)]]: + if io_dict is None: + return io_dict + if isinstance(io_dict, (Database, FileSystem)): + component_io = io_dict + else: + if isinstance(io_dict, dict): + data_type = io_dict.pop("type", None) + if data_type == ExternalDataType.DATABASE: + component_io = Database(**io_dict) + elif data_type == ExternalDataType.FILE_SYSTEM: + component_io = FileSystem(**io_dict) + else: + msg = "Type in source or sink only support {} and {}, currently got {}." + raise ValidationException( + message=msg.format( + ExternalDataType.DATABASE, + ExternalDataType.FILE_SYSTEM, + data_type, + ), + no_personal_data_message=msg.format( + ExternalDataType.DATABASE, + ExternalDataType.FILE_SYSTEM, + "data_type", + ), + target=ErrorTarget.DATA_TRANSFER_JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + else: + msg = "Source or sink only support dict, Database and FileSystem" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.DATA_TRANSFER_JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + return component_io + + +class DataTransferCopyJob(DataTransferJob): + def __init__( + self, + *, + inputs: Optional[Dict[str, Union[Input, str]]] = None, + outputs: Optional[Dict[str, Union[Output]]] = None, + data_copy_mode: Optional[str] = None, + **kwargs: Any, + ): + kwargs["task"] = DataTransferTaskType.COPY_DATA + super().__init__(**kwargs) + + self.outputs = outputs # type: ignore[assignment] + self.inputs = inputs # type: ignore[assignment] + self.data_copy_mode = data_copy_mode + + def _to_dict(self) -> Dict: + res: dict = DataTransferCopyJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "DataTransferCopyJob": + loaded_data = load_from_dict(DataTransferCopyJobSchema, data, context, additional_message, **kwargs) + return DataTransferCopyJob(base_path=context[BASE_PATH_CONTEXT_KEY], **loaded_data) + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "DataTransferCopyComponent": + """Translate a data transfer copy job to component. + + :param context: Context of data transfer job YAML file. + :type context: dict + :return: Translated data transfer copy component. + :rtype: DataTransferCopyComponent + """ + from azure.ai.ml.entities._component.datatransfer_component import DataTransferCopyComponent + + pipeline_job_dict = kwargs.get("pipeline_job_dict", {}) + context = context or {BASE_PATH_CONTEXT_KEY: Path("./")} + + # Create anonymous command component with default version as 1 + return DataTransferCopyComponent( + tags=self.tags, + is_anonymous=True, + base_path=context[BASE_PATH_CONTEXT_KEY], + description=self.description, + inputs=self._to_inputs(inputs=self.inputs, pipeline_job_dict=pipeline_job_dict), + outputs=self._to_outputs(outputs=self.outputs, pipeline_job_dict=pipeline_job_dict), + data_copy_mode=self.data_copy_mode, + ) + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "DataTransferCopy": + """Translate a data transfer copy job to a pipeline node. + + :param context: Context of data transfer job YAML file. + :type context: dict + :return: Translated data transfer component. + :rtype: DataTransferCopy + """ + from azure.ai.ml.entities._builders import DataTransferCopy + + component = self._to_component(context, **kwargs) + + return DataTransferCopy( + component=component, + compute=self.compute, + # Need to supply the inputs with double curly. + inputs=self.inputs, # type: ignore[arg-type] + outputs=self.outputs, # type: ignore[arg-type] + description=self.description, + tags=self.tags, + display_name=self.display_name, + ) + + +class DataTransferImportJob(DataTransferJob): + def __init__( + self, + *, + outputs: Optional[Dict[str, Union[Output]]] = None, + source: Optional[Union[Dict, Database, FileSystem]] = None, + **kwargs: Any, + ): + kwargs["task"] = DataTransferTaskType.IMPORT_DATA + super().__init__(**kwargs) + + self.outputs = outputs # type: ignore[assignment] + self.source = self._build_source_sink(source) + + def _to_dict(self) -> Dict: + res: dict = DataTransferImportJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "DataTransferImportJob": + loaded_data = load_from_dict(DataTransferImportJobSchema, data, context, additional_message, **kwargs) + return DataTransferImportJob(base_path=context[BASE_PATH_CONTEXT_KEY], **loaded_data) + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> str: + """Translate a data transfer import job to component. + + :param context: Context of data transfer job YAML file. + :type context: dict + :return: Translated data transfer import component. + :rtype: str + """ + + component: str = "" + if self.source is not None and self.source.type == ExternalDataType.DATABASE: + component = DataTransferBuiltinComponentUri.IMPORT_DATABASE + else: + component = DataTransferBuiltinComponentUri.IMPORT_FILE_SYSTEM + + return component + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "DataTransferImport": + """Translate a data transfer import job to a pipeline node. + + :param context: Context of data transfer job YAML file. + :type context: dict + :return: Translated data transfer import node. + :rtype: DataTransferImport + """ + from azure.ai.ml.entities._builders import DataTransferImport + + component = self._to_component(context, **kwargs) + + return DataTransferImport( + component=component, + compute=self.compute, + source=self.source, + outputs=self.outputs, # type: ignore[arg-type] + description=self.description, + tags=self.tags, + display_name=self.display_name, + properties=self.properties, + ) + + +class DataTransferExportJob(DataTransferJob): + def __init__( + self, + *, + inputs: Optional[Dict[str, Union[Input]]] = None, + sink: Optional[Union[Dict, Database, FileSystem]] = None, + **kwargs: Any, + ): + kwargs["task"] = DataTransferTaskType.EXPORT_DATA + super().__init__(**kwargs) + + self.inputs = inputs # type: ignore[assignment] + self.sink = self._build_source_sink(sink) + + def _to_dict(self) -> Dict: + res: dict = DataTransferExportJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "DataTransferExportJob": + loaded_data = load_from_dict(DataTransferExportJobSchema, data, context, additional_message, **kwargs) + return DataTransferExportJob(base_path=context[BASE_PATH_CONTEXT_KEY], **loaded_data) + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> str: + """Translate a data transfer export job to component. + + :param context: Context of data transfer job YAML file. + :type context: dict + :return: Translated data transfer export component. + :rtype: str + """ + component: str = "" + if self.sink is not None and self.sink.type == ExternalDataType.DATABASE: + component = DataTransferBuiltinComponentUri.EXPORT_DATABASE + else: + msg = "Sink is a required field for export data task and we don't support exporting file system for now." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.DATA_TRANSFER_JOB, + error_type=ValidationErrorType.INVALID_VALUE, + ) + return component + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "DataTransferExport": + """Translate a data transfer export job to a pipeline node. + + :param context: Context of data transfer job YAML file. + :type context: dict + :return: Translated data transfer export node. + :rtype: DataTransferExport + """ + from azure.ai.ml.entities._builders import DataTransferExport + + component = self._to_component(context, **kwargs) + + return DataTransferExport( + component=component, + compute=self.compute, + sink=self.sink, + inputs=self.inputs, # type: ignore[arg-type] + description=self.description, + tags=self.tags, + display_name=self.display_name, + properties=self.properties, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/constants.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/constants.py new file mode 100644 index 00000000..5084ffbd --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/constants.py @@ -0,0 +1,20 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + + +class AzureMLDistillationProperties: + ENABLE_DISTILLATION = "azureml.enable_distillation" + DATA_GENERATION_TYPE = "azureml.data_generation_type" + DATA_GENERATION_TASK_TYPE = "azureml.data_generation_task_type" + TEACHER_MODEL = "azureml.teacher_model" + INSTANCE_TYPE = "azureml.instance_type" + CONNECTION_INFORMATION = "azureml.connection_information" + + +class EndpointSettings: + VALID_SETTINGS = {"request_batch_size", "min_endpoint_success_ratio"} + + +class PromptSettingKeys: + VALID_SETTINGS = {"enable_chain_of_thought", "enable_chain_of_density", "max_len_summary"} diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/distillation_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/distillation_job.py new file mode 100644 index 00000000..469fde98 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/distillation_job.py @@ -0,0 +1,542 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import json +from typing import Any, Dict, Optional + +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + CustomModelFineTuning as RestCustomModelFineTuningVertical, +) +from azure.ai.ml._restclient.v2024_01_01_preview.models import FineTuningJob as RestFineTuningJob +from azure.ai.ml._restclient.v2024_01_01_preview.models import JobBase as RestJobBase +from azure.ai.ml._restclient.v2024_01_01_preview.models import MLFlowModelJobInput, UriFileJobInput +from azure.ai.ml._utils._experimental import experimental +from azure.ai.ml.constants import DataGenerationType, JobType +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE, AssetTypes +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.distillation.constants import ( + AzureMLDistillationProperties, + EndpointSettings, + PromptSettingKeys, +) +from azure.ai.ml.entities._job.distillation.endpoint_request_settings import EndpointRequestSettings +from azure.ai.ml.entities._job.distillation.prompt_settings import PromptSettings +from azure.ai.ml.entities._job.distillation.teacher_model_settings import TeacherModelSettings +from azure.ai.ml.entities._job.job import Job +from azure.ai.ml.entities._job.job_io_mixin import JobIOMixin +from azure.ai.ml.entities._job.resource_configuration import ResourceConfiguration +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml.entities._workspace.connections.workspace_connection import WorkspaceConnection + + +# pylint: disable=too-many-instance-attributes +@experimental +class DistillationJob(Job, JobIOMixin): + def __init__( + self, + *, + data_generation_type: str, + data_generation_task_type: str, + teacher_model_endpoint_connection: WorkspaceConnection, + student_model: Input, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + teacher_model_settings: Optional[TeacherModelSettings] = None, + prompt_settings: Optional[PromptSettings] = None, + hyperparameters: Optional[Dict] = None, + resources: Optional[ResourceConfiguration] = None, + **kwargs: Any, + ) -> None: + self._data_generation_type = data_generation_type + self._data_generation_task_type = data_generation_task_type + self._teacher_model_endpoint_connection = teacher_model_endpoint_connection + self._student_model = student_model + self._training_data = training_data + self._validation_data = validation_data + self._teacher_model_settings = teacher_model_settings + self._prompt_settings = prompt_settings + self._hyperparameters = hyperparameters + self._resources = resources + + if self._training_data is None and self._data_generation_type == DataGenerationType.LABEL_GENERATION: + raise ValueError( + f"Training data can not be None when data generation type is set to " + f"{DataGenerationType.LABEL_GENERATION}." + ) + + if self._validation_data is None and self._data_generation_type == DataGenerationType.LABEL_GENERATION: + raise ValueError( + f"Validation data can not be None when data generation type is set to " + f"{DataGenerationType.LABEL_GENERATION}." + ) + + kwargs[TYPE] = JobType.DISTILLATION + self._outputs = kwargs.pop("outputs", None) + super().__init__(**kwargs) + + @property + def data_generation_type(self) -> str: + """Get the type of synthetic data generation to perform. + + :return: str representing the type of synthetic data generation to perform. + :rtype: str + """ + return self._data_generation_type + + @data_generation_type.setter + def data_generation_type(self, task: str) -> None: + """Set the data generation task. + + :param task: The data generation task. Possible values include 'Label_Generation' and 'Data_Generation'. + :type task: str + """ + self._data_generation_type = task + + @property + def data_generation_task_type(self) -> str: + """Get the type of synthetic data to generate. + + :return: str representing the type of synthetic data to generate. + :rtype: str + """ + return self._data_generation_task_type + + @data_generation_task_type.setter + def data_generation_task_type(self, task: str) -> None: + """Set the data generation type. + + :param task: The data generation type. Possible values include 'nli', 'nlu_qa', 'conversational', + 'math', and 'summarization'. + :type task: str + """ + self._data_generation_task_type = task + + @property + def teacher_model_endpoint_connection(self) -> WorkspaceConnection: + """Get the endpoint connection of the teacher model to use for data generation. + + :return: Endpoint connection + :rtype: WorkspaceConnection + """ + return self._teacher_model_endpoint_connection + + @teacher_model_endpoint_connection.setter + def teacher_model_endpoint_connection(self, connection: WorkspaceConnection) -> None: + """Set the endpoint information of the teacher model. + + :param connection: Workspace connection + :type connection: WorkspaceConnection + """ + self._teacher_model_endpoint_connection = connection + + @property + def student_model(self) -> Input: + """Get the student model to be trained with synthetic data + + :return: The student model to be finetuned + :rtype: Input + """ + return self._student_model + + @student_model.setter + def student_model(self, model: Input) -> None: + """Set the student model to be trained. + + :param model: The model to use for finetuning + :type model: Input + """ + self._student_model = model + + @property + def training_data(self) -> Optional[Input]: + """Get the training data. + + :return: Training data input + :rtype: typing.Optional[Input] + """ + return self._training_data + + @training_data.setter + def training_data(self, training_data: Optional[Input]) -> None: + """Set the training data. + + :param training_data: Training data input + :type training_data: typing.Optional[Input] + """ + self._training_data = training_data + + @property + def validation_data(self) -> Optional[Input]: + """Get the validation data. + + :return: Validation data input + :rtype: typing.Optional[Input] + """ + return self._validation_data + + @validation_data.setter + def validation_data(self, validation_data: Optional[Input]) -> None: + """Set the validation data. + + :param validation_data: Validation data input + :type validation_data: typing.Optional[Input] + """ + self._validation_data = validation_data + + @property + def teacher_model_settings(self) -> Optional[TeacherModelSettings]: + """Get the teacher model settings. + + :return: The settings for the teacher model to use. + :rtype: typing.Optional[TeacherModelSettings] + """ + return self._teacher_model_settings + + @property + def prompt_settings(self) -> Optional[PromptSettings]: + """Get the settings for the prompt. + + :return: The settings for the prompt. + :rtype: typing.Optional[PromptSettings] + """ + return self._prompt_settings + + @property + def hyperparameters(self) -> Optional[Dict]: + """Get the finetuning hyperparameters. + + :return: The finetuning hyperparameters. + :rtype: typing.Optional[typing.Dict] + """ + return self._hyperparameters + + @property + def resources(self) -> Optional[ResourceConfiguration]: + """Get the resources for data generation. + + :return: The resources for data generation. + :rtype: typing.Optional[ResourceConfiguration] + """ + return self._resources + + @resources.setter + def resources(self, resource: Optional[ResourceConfiguration]) -> None: + """Set the resources for data generation. + + :param resource: The resources for data generation. + :type resource: typing.Optional[ResourceConfiguration] + """ + self._resources = resource + + def set_teacher_model_settings( + self, + inference_parameters: Optional[Dict] = None, + endpoint_request_settings: Optional[EndpointRequestSettings] = None, + ): + """Set settings related to the teacher model. + + :param inference_parameters: Settings the teacher model uses during inferencing. + :type inference_parameters: typing.Optional[typing.Dict] + :param endpoint_request_settings: Settings for inference requests to the endpoint + :type endpoint_request_settings: typing.Optional[EndpointRequestSettings] + """ + self._teacher_model_settings = TeacherModelSettings( + inference_parameters=inference_parameters, endpoint_request_settings=endpoint_request_settings + ) + + def set_prompt_settings(self, prompt_settings: Optional[PromptSettings]): + """Set settings related to the system prompt used for generating data. + + :param prompt_settings: Settings related to the system prompt used for generating data. + :type prompt_settings: typing.Optional[PromptSettings] + """ + self._prompt_settings = prompt_settings if prompt_settings is not None else self._prompt_settings + + def set_finetuning_settings(self, hyperparameters: Optional[Dict]): + """Set the hyperparamters for finetuning. + + :param hyperparameters: The hyperparameters for finetuning. + :type hyperparameters: typing.Optional[typing.Dict] + """ + self._hyperparameters = hyperparameters if hyperparameters is not None else self._hyperparameters + + def _to_dict(self) -> Dict: + """Convert the object to a dictionary. + + :return: dictionary representation of the object. + :rtype: typing.Dict + """ + from azure.ai.ml._schema._distillation.distillation_job import DistillationJobSchema + + schema_dict: dict = {} + schema_dict = DistillationJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "DistillationJob": + """Load from a dictionary. + + :param data: dictionary representation of the object. + :type data: typing.Dict + :param context: dictionary containing the context. + :type context: typing.Dict + :param additional_message: additional message to be added to the error message. + :type additional_message: str + :return: DistillationJob object. + :rtype: DistillationJob + """ + from azure.ai.ml._schema._distillation.distillation_job import DistillationJobSchema + + loaded_data = load_from_dict(DistillationJobSchema, data, context, additional_message, **kwargs) + + training_data = loaded_data.get("training_data", None) + if isinstance(training_data, str): + loaded_data["training_data"] = Input(type="uri_file", path=training_data) + + validation_data = loaded_data.get("validation_data", None) + if isinstance(validation_data, str): + loaded_data["validation_data"] = Input(type="uri_file", path=validation_data) + + student_model = loaded_data.get("student_model", None) + if isinstance(student_model, str): + loaded_data["student_model"] = Input(type=AssetTypes.URI_FILE, path=student_model) + + job_instance = DistillationJob(**loaded_data) + return job_instance + + @classmethod + def _from_rest_object(cls, obj: RestJobBase) -> "DistillationJob": + """Convert a REST object to DistillationJob object. + + :param obj: CustomModelFineTuningJob in Rest format. + :type obj: JobBase + :return: DistillationJob objects. + :rtype: DistillationJob + """ + properties: RestFineTuningJob = obj.properties + finetuning_details: RestCustomModelFineTuningVertical = properties.fine_tuning_details + + job_kwargs_dict = DistillationJob._filter_properties(properties=properties.properties) + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "outputs": from_rest_data_outputs(properties.outputs), + } + + distillation_job = cls( + student_model=finetuning_details.model, + training_data=finetuning_details.training_data, + validation_data=finetuning_details.validation_data, + hyperparameters=finetuning_details.hyper_parameters, + **job_kwargs_dict, + **job_args_dict, + ) + + distillation_job._restore_inputs() + + return distillation_job + + def _to_rest_object(self) -> "RestFineTuningJob": + """Convert DistillationJob object to a RestFineTuningJob object. + + :return: REST object representation of this object. + :rtype: JobBase + """ + distillation = RestCustomModelFineTuningVertical( + task_type="ChatCompletion", + model=self.student_model, + model_provider="Custom", + training_data=self.training_data, + validation_data=self.validation_data, + hyper_parameters=self._hyperparameters, + ) + + if isinstance(distillation.training_data, Input): + distillation.training_data = UriFileJobInput(uri=distillation.training_data.path) + if isinstance(distillation.validation_data, Input): + distillation.validation_data = UriFileJobInput(uri=distillation.validation_data.path) + if isinstance(distillation.model, Input): + distillation.model = MLFlowModelJobInput(uri=distillation.model.path) + + self._add_distillation_properties(self.properties) + + finetuning_job = RestFineTuningJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + services=self.services, + tags=self.tags, + properties=self.properties, + fine_tuning_details=distillation, + outputs=to_rest_data_outputs(self.outputs), + ) + + result = RestJobBase(properties=finetuning_job) + result.name = self.name + + return result + + @classmethod + def _load_from_rest(cls, obj: RestJobBase) -> "DistillationJob": + """Loads the rest object to a dict containing items to init the AutoMLJob objects. + + :param obj: Azure Resource Manager resource envelope. + :type obj: JobBase + :raises ValidationException: task type validation error + :return: A DistillationJob + :rtype: DistillationJob + """ + return DistillationJob._from_rest_object(obj) + + # TODO: Remove once Distillation is added to MFE + def _add_distillation_properties(self, properties: Dict) -> None: + """Adds DistillationJob attributes to properties to pass into the FT Overloaded API property bag + + :param properties: Current distillation properties + :type properties: typing.Dict + """ + properties[AzureMLDistillationProperties.ENABLE_DISTILLATION] = True + properties[AzureMLDistillationProperties.DATA_GENERATION_TASK_TYPE] = self._data_generation_task_type.upper() + properties[f"{AzureMLDistillationProperties.TEACHER_MODEL}.endpoint_name"] = ( + self._teacher_model_endpoint_connection.name + ) + + # Not needed for FT Overload API but additional info needed to convert from REST object to Distillation object + properties[AzureMLDistillationProperties.DATA_GENERATION_TYPE] = self._data_generation_type + properties[AzureMLDistillationProperties.CONNECTION_INFORMATION] = json.dumps( + self._teacher_model_endpoint_connection._to_dict() # pylint: disable=protected-access + ) + + if self._prompt_settings: + for setting, value in self._prompt_settings.items(): + if value is not None: + properties[f"azureml.{setting.strip('_')}"] = value + + if self._teacher_model_settings: + inference_settings = self._teacher_model_settings.inference_parameters + endpoint_settings = self._teacher_model_settings.endpoint_request_settings + + if inference_settings: + for inference_key, value in inference_settings.items(): + if value is not None: + properties[f"{AzureMLDistillationProperties.TEACHER_MODEL}.{inference_key}"] = value + + if endpoint_settings: + for setting, value in endpoint_settings.items(): + if value is not None: + properties[f"azureml.{setting.strip('_')}"] = value + + if self._resources and self._resources.instance_type: + properties[f"{AzureMLDistillationProperties.INSTANCE_TYPE}.data_generation"] = self._resources.instance_type + + # TODO: Remove once Distillation is added to MFE + @classmethod + def _filter_properties(cls, properties: Dict) -> Dict: + """Convert properties from REST object back to their original states. + + :param properties: Properties from a REST object + :type properties: typing.Dict + :return: A dict that can be used to create a DistillationJob + :rtype: typing.Dict + """ + inference_parameters = {} + endpoint_settings = {} + prompt_settings = {} + resources = {} + teacher_settings = {} + teacher_model_info = "" + for key, val in properties.items(): + param = key.split(".")[-1] + if AzureMLDistillationProperties.TEACHER_MODEL in key and param != "endpoint_name": + inference_parameters[param] = val + elif AzureMLDistillationProperties.INSTANCE_TYPE in key: + resources[key.split(".")[1]] = val + elif AzureMLDistillationProperties.CONNECTION_INFORMATION in key: + teacher_model_info = val + else: + if param in EndpointSettings.VALID_SETTINGS: + endpoint_settings[param] = val + elif param in PromptSettingKeys.VALID_SETTINGS: + prompt_settings[param] = val + + if inference_parameters: + teacher_settings["inference_parameters"] = inference_parameters + if endpoint_settings: + teacher_settings["endpoint_request_settings"] = EndpointRequestSettings(**endpoint_settings) # type: ignore + + return { + "data_generation_task_type": properties.get(AzureMLDistillationProperties.DATA_GENERATION_TASK_TYPE), + "data_generation_type": properties.get(AzureMLDistillationProperties.DATA_GENERATION_TYPE), + "teacher_model_endpoint_connection": WorkspaceConnection._load( # pylint: disable=protected-access + data=json.loads(teacher_model_info) + ), + "teacher_model_settings": ( + TeacherModelSettings(**teacher_settings) if teacher_settings else None # type: ignore + ), + "prompt_settings": PromptSettings(**prompt_settings) if prompt_settings else None, + "resources": ResourceConfiguration(**resources) if resources else None, + } + + def _restore_inputs(self) -> None: + """Restore UriFileJobInputs to JobInputs within data_settings.""" + if isinstance(self.training_data, UriFileJobInput): + self.training_data = Input(type=AssetTypes.URI_FILE, path=self.training_data.uri) + if isinstance(self.validation_data, UriFileJobInput): + self.validation_data = Input(type=AssetTypes.URI_FILE, path=self.validation_data.uri) + if isinstance(self.student_model, MLFlowModelJobInput): + self.student_model = Input(type=AssetTypes.MLFLOW_MODEL, path=self.student_model.uri) + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, DistillationJob): + return False + return ( + super().__eq__(other) + and self.data_generation_type == other.data_generation_type + and self.data_generation_task_type == other.data_generation_task_type + and self.teacher_model_endpoint_connection.name == other.teacher_model_endpoint_connection.name + and self.student_model == other.student_model + and self.training_data == other.training_data + and self.validation_data == other.validation_data + and self.teacher_model_settings == other.teacher_model_settings + and self.prompt_settings == other.prompt_settings + and self.hyperparameters == other.hyperparameters + and self.resources == other.resources + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two DistillationJob objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/endpoint_request_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/endpoint_request_settings.py new file mode 100644 index 00000000..89fb8015 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/endpoint_request_settings.py @@ -0,0 +1,90 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Optional + +from azure.ai.ml._utils._experimental import experimental + + +@experimental +class EndpointRequestSettings: + def __init__(self, *, request_batch_size: Optional[int] = None, min_endpoint_success_ratio: Optional[float] = None): + """Initialize EndpointRequestSettings. + + :param request_batch_size: The number of requests to send to the teacher model endpoint as a batch, + defaults to None + :type request_batch_size: typing.Optional[int], optional + :param min_endpoint_success_ratio: The ratio of (successful requests / total requests) needed for the + data generation step to be considered successful. Must be a value between 0 and 1 inclusive, + defaults to None + :type min_endpoint_success_ratio: typing.Optional[float], optional + """ + self._request_batch_size = request_batch_size + self._min_endpoint_success_ratio = min_endpoint_success_ratio + + @property + def request_batch_size(self) -> Optional[int]: + """Get the number of inference requests to send to the teacher model as a batch. + + :return: The number of inference requests to send to the teacher model as a batch. + :rtype: typing.Optional[int] + """ + return self._request_batch_size + + @request_batch_size.setter + def request_batch_size(self, value: Optional[int]) -> None: + """Set the number of inference requests to send to the teacher model as a batch. + + :param value: The number of inference requests to send to the teacher model as a batch. + :type value: typing.Optional[int] + """ + self._request_batch_size = value + + @property + def min_endpoint_success_ratio(self) -> Optional[float]: + """Get the minimum ratio of successful inferencing requests. + + :return: The minimum ratio of successful inferencing requests. + :rtype: typing.Optional[float] + """ + return self._min_endpoint_success_ratio + + @min_endpoint_success_ratio.setter + def min_endpoint_success_ratio(self, ratio: Optional[float]) -> None: + """Set the minimum ratio of successful inferencing requests. + + :param ratio: The minimum ratio of successful inferencing requests. + :type ratio: typing.Optional[float] + """ + self._min_endpoint_success_ratio = ratio + + def items(self): + return self.__dict__.items() + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, EndpointRequestSettings): + return False + return ( + self.request_batch_size == other.request_batch_size + and self.min_endpoint_success_ratio == other.min_endpoint_success_ratio + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two EndpointRequestSettings objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/prompt_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/prompt_settings.py new file mode 100644 index 00000000..d74af748 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/prompt_settings.py @@ -0,0 +1,138 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Optional + +from azure.ai.ml._utils._experimental import experimental + + +@experimental +class PromptSettings: + def __init__( + self, + *, + enable_chain_of_thought: bool = False, + enable_chain_of_density: bool = False, + max_len_summary: Optional[int] = None, + # custom_prompt: Optional[str] = None + ): + """Initialize PromptSettings. + + :param enable_chain_of_thought: Whether or not to enable chain of thought which modifies the system prompt + used. Can be used for all `data_generation_task_type` values except `SUMMARIZATION`, defaults to False + :type enable_chain_of_thought: bool, optional + :param enable_chain_of_density: Whether or not to enable chain of density which modifies the system prompt + used. Can only be used for `data_generation_task_type` of `SUMMARIZATION`, defaults to False + :type enable_chain_of_density: bool, optional + :param max_len_summary: The maximum length of the summary generated for data_generation_task_type` of + `SUMMARIZATION`, defaults to None + :type max_len_summary: typing.Optional[int] + """ + self._enable_chain_of_thought = enable_chain_of_thought + self._enable_chain_of_density = enable_chain_of_density + self._max_len_summary = max_len_summary + # self._custom_prompt = custom_prompt + + @property + def enable_chain_of_thought(self) -> bool: + """Get whether or not chain of thought is enabled. + + :return: Whether or not chain of thought is enabled. + :rtype: bool + """ + return self._enable_chain_of_thought + + @enable_chain_of_thought.setter + def enable_chain_of_thought(self, value: bool) -> None: + """Set chain of thought. + + :param value: Whether or not chain of thought is enabled. + :type value: bool + """ + self._enable_chain_of_thought = value + + @property + def enable_chain_of_density(self) -> bool: + """Get whether or not chain of density is enabled. + + :return: Whether or not chain of thought is enabled + :rtype: bool + """ + return self._enable_chain_of_density + + @enable_chain_of_density.setter + def enable_chain_of_density(self, value: bool) -> None: + """Set whether or not chain of thought is enabled. + + :param value: Whether or not chain of thought is enabled + :type value: bool + """ + self._enable_chain_of_density = value + + @property + def max_len_summary(self) -> Optional[int]: + """The number of tokens to use for summarization. + + :return: The number of tokens to use for summarization + :rtype: typing.Optional[int] + """ + return self._max_len_summary + + @max_len_summary.setter + def max_len_summary(self, length: Optional[int]) -> None: + """Set the number of tokens to use for summarization. + + :param length: The number of tokens to use for summarization. + :type length: typing.Optional[int] + """ + self._max_len_summary = length + + # @property + # def custom_prompt(self) -> Optional[str]: + # """Get the custom system prompt to use for inferencing. + # :return: The custom prompt to use for inferencing. + # :rtype: Optional[str] + # """ + # return self._custom_prompt + + # @custom_prompt.setter + # def custom_prompt(self, prompt: Optional[str]) -> None: + # """Set the custom prompt to use for inferencing. + + # :param prompt: The custom prompt to use for inferencing. + # :type prompt: Optional[str] + # """ + # self._custom_prompt = prompt + + def items(self): + return self.__dict__.items() + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, PromptSettings): + return False + return ( + self.enable_chain_of_thought == other.enable_chain_of_thought + and self.enable_chain_of_density == other.enable_chain_of_density + and self.max_len_summary == other.max_len_summary + # self.custom_prompt == other.custom_prompt + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two PromptSettings objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/teacher_model_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/teacher_model_settings.py new file mode 100644 index 00000000..481800de --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distillation/teacher_model_settings.py @@ -0,0 +1,93 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Dict, Optional + +from azure.ai.ml._utils._experimental import experimental +from azure.ai.ml.entities._job.distillation.endpoint_request_settings import EndpointRequestSettings + + +@experimental +class TeacherModelSettings: + def __init__( + self, + *, + inference_parameters: Optional[Dict] = None, + endpoint_request_settings: Optional[EndpointRequestSettings] = None, + ): + """Initialize TeacherModelSettings + + :param inference_parameters: The inference parameters inferencing requests will use, defaults to None + :type inference_parameters: typing.Optional[typing.Dict], optional + :param endpoint_request_settings: The settings to use for the endpoint, defaults to None + :type endpoint_request_settings: typing.Optional[EndpointRequestSettings], optional + """ + self._inference_parameters = inference_parameters + self._endpoint_request_settings = endpoint_request_settings + + @property + def inference_parameters(self) -> Optional[Dict]: + """Get the inference parameters. + + :return: The inference parameters. + :rtype: typing.Optional[typing.Dict] + """ + return self._inference_parameters + + @inference_parameters.setter + def inference_parameters(self, params: Optional[Dict]) -> None: + """Set the inference parameters. + + :param params: Inference parameters. + :type params: typing.Optional[typing.Dict] + """ + self._inference_parameters = params + + @property + def endpoint_request_settings(self) -> Optional[EndpointRequestSettings]: + """Get the endpoint request settings. + + :return: The endpoint request settings. + :rtype: typing.Optional[EndpointRequestSettings] + """ + return self._endpoint_request_settings + + @endpoint_request_settings.setter + def endpoint_request_settings(self, endpoint_settings: Optional[EndpointRequestSettings]) -> None: + """Set the endpoint request settings. + + :param endpoint_settings: Endpoint request settings + :type endpoint_settings: typing.Optional[EndpointRequestSettings] + """ + self._endpoint_request_settings = endpoint_settings + + def items(self): + return self.__dict__.items() + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, TeacherModelSettings): + return False + return ( + self.inference_parameters == other.inference_parameters + and self.endpoint_request_settings == other.endpoint_request_settings + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two TeacherModelSettings objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distribution.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distribution.py new file mode 100644 index 00000000..ec7277c6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/distribution.py @@ -0,0 +1,229 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + DistributionConfiguration as RestDistributionConfiguration, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import DistributionType as RestDistributionType +from azure.ai.ml._restclient.v2023_04_01_preview.models import Mpi as RestMpi +from azure.ai.ml._restclient.v2023_04_01_preview.models import PyTorch as RestPyTorch +from azure.ai.ml._restclient.v2023_04_01_preview.models import Ray as RestRay +from azure.ai.ml._restclient.v2023_04_01_preview.models import TensorFlow as RestTensorFlow +from azure.ai.ml._utils._experimental import experimental +from azure.ai.ml.constants import DistributionType +from azure.ai.ml.entities._mixins import RestTranslatableMixin + +SDK_TO_REST = { + DistributionType.MPI: RestDistributionType.MPI, + DistributionType.TENSORFLOW: RestDistributionType.TENSOR_FLOW, + DistributionType.PYTORCH: RestDistributionType.PY_TORCH, + DistributionType.RAY: RestDistributionType.RAY, +} + + +class DistributionConfiguration(RestTranslatableMixin): + """Distribution configuration for a component or job. + + This class is not meant to be instantiated directly. Instead, use one of its subclasses. + """ + + def __init__(self, **kwargs: Any) -> None: + self.type: Any = None + + @classmethod + def _from_rest_object( + cls, obj: Optional[Union[RestDistributionConfiguration, Dict]] + ) -> Optional["DistributionConfiguration"]: + """Constructs a DistributionConfiguration object from a REST object + + This function works for distribution property of a Job object and of a Component object() + + Distribution of Job when returned by MFE, is a RestDistributionConfiguration + + Distribution of Component when returned by MFE, is a Dict. + e.g. {'type': 'Mpi', 'process_count_per_instance': '1'} + + So in the job distribution case, we need to call as_dict() first and get type from "distribution_type" property. + In the componenet case, we need to extract type from key "type" + + + :param obj: The object to translate + :type obj: Optional[Union[RestDistributionConfiguration, Dict]] + :return: The distribution configuration + :rtype: DistributionConfiguration + """ + if obj is None: + return None + + if isinstance(obj, dict): + data = obj + else: + data = obj.as_dict() + + type_str = data.pop("distribution_type", None) or data.pop("type", None) + klass = DISTRIBUTION_TYPE_MAP[type_str.lower()] + res: DistributionConfiguration = klass(**data) + return res + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, DistributionConfiguration): + return NotImplemented + res: bool = self._to_rest_object() == other._to_rest_object() + return res + + +class MpiDistribution(DistributionConfiguration): + """MPI distribution configuration. + + :keyword process_count_per_instance: The number of processes per node. + :paramtype process_count_per_instance: Optional[int] + :ivar type: Specifies the type of distribution. Set automatically to "mpi" for this class. + :vartype type: str + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_misc.py + :start-after: [START mpi_distribution_configuration] + :end-before: [END mpi_distribution_configuration] + :language: python + :dedent: 8 + :caption: Configuring a CommandComponent with an MpiDistribution. + """ + + def __init__(self, *, process_count_per_instance: Optional[int] = None, **kwargs: Any) -> None: + super().__init__(**kwargs) + self.type = DistributionType.MPI + self.process_count_per_instance = process_count_per_instance + + def _to_rest_object(self) -> RestMpi: + return RestMpi(process_count_per_instance=self.process_count_per_instance) + + +class PyTorchDistribution(DistributionConfiguration): + """PyTorch distribution configuration. + + :keyword process_count_per_instance: The number of processes per node. + :paramtype process_count_per_instance: Optional[int] + :ivar type: Specifies the type of distribution. Set automatically to "pytorch" for this class. + :vartype type: str + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_misc.py + :start-after: [START pytorch_distribution_configuration] + :end-before: [END pytorch_distribution_configuration] + :language: python + :dedent: 8 + :caption: Configuring a CommandComponent with a PyTorchDistribution. + """ + + def __init__(self, *, process_count_per_instance: Optional[int] = None, **kwargs: Any) -> None: + super().__init__(**kwargs) + self.type = DistributionType.PYTORCH + self.process_count_per_instance = process_count_per_instance + + def _to_rest_object(self) -> RestPyTorch: + return RestPyTorch(process_count_per_instance=self.process_count_per_instance) + + +class TensorFlowDistribution(DistributionConfiguration): + """TensorFlow distribution configuration. + + :vartype distribution_type: str or ~azure.mgmt.machinelearningservices.models.DistributionType + :keyword parameter_server_count: The number of parameter server tasks. Defaults to 0. + :paramtype parameter_server_count: Optional[int] + :keyword worker_count: The number of workers. Defaults to the instance count. + :paramtype worker_count: Optional[int] + :ivar parameter_server_count: Number of parameter server tasks. + :vartype parameter_server_count: int + :ivar worker_count: Number of workers. If not specified, will default to the instance count. + :vartype worker_count: int + :ivar type: Specifies the type of distribution. Set automatically to "tensorflow" for this class. + :vartype type: str + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_misc.py + :start-after: [START tensorflow_distribution_configuration] + :end-before: [END tensorflow_distribution_configuration] + :language: python + :dedent: 8 + :caption: Configuring a CommandComponent with a TensorFlowDistribution. + """ + + def __init__( + self, *, parameter_server_count: Optional[int] = 0, worker_count: Optional[int] = None, **kwargs: Any + ) -> None: + super().__init__(**kwargs) + self.type = DistributionType.TENSORFLOW + self.parameter_server_count = parameter_server_count + self.worker_count = worker_count + + def _to_rest_object(self) -> RestTensorFlow: + return RestTensorFlow(parameter_server_count=self.parameter_server_count, worker_count=self.worker_count) + + +@experimental +class RayDistribution(DistributionConfiguration): + """Ray distribution configuration. + + :vartype distribution_type: str or ~azure.mgmt.machinelearningservices.models.DistributionType + :ivar port: The port of the head ray process. + :vartype port: int + :ivar address: The address of Ray head node. + :vartype address: str + :ivar include_dashboard: Provide this argument to start the Ray dashboard GUI. + :vartype include_dashboard: bool + :ivar dashboard_port: The port to bind the dashboard server to. + :vartype dashboard_port: int + :ivar head_node_additional_args: Additional arguments passed to ray start in head node. + :vartype head_node_additional_args: str + :ivar worker_node_additional_args: Additional arguments passed to ray start in worker node. + :vartype worker_node_additional_args: str + :ivar type: Specifies the type of distribution. Set automatically to "Ray" for this class. + :vartype type: str + """ + + def __init__( + self, + *, + port: Optional[int] = None, + address: Optional[str] = None, + include_dashboard: Optional[bool] = None, + dashboard_port: Optional[int] = None, + head_node_additional_args: Optional[str] = None, + worker_node_additional_args: Optional[str] = None, + **kwargs: Any + ): + super().__init__(**kwargs) + self.type = DistributionType.RAY + + self.port = port + self.address = address + self.include_dashboard = include_dashboard + self.dashboard_port = dashboard_port + self.head_node_additional_args = head_node_additional_args + self.worker_node_additional_args = worker_node_additional_args + + def _to_rest_object(self) -> RestRay: + return RestRay( + port=self.port, + address=self.address, + include_dashboard=self.include_dashboard, + dashboard_port=self.dashboard_port, + head_node_additional_args=self.head_node_additional_args, + worker_node_additional_args=self.worker_node_additional_args, + ) + + +DISTRIBUTION_TYPE_MAP = { + DistributionType.MPI: MpiDistribution, + DistributionType.TENSORFLOW: TensorFlowDistribution, + DistributionType.PYTORCH: PyTorchDistribution, + DistributionType.RAY: RayDistribution, +} diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/azure_openai_finetuning_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/azure_openai_finetuning_job.py new file mode 100644 index 00000000..e659c634 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/azure_openai_finetuning_job.py @@ -0,0 +1,242 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict + +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + ModelProvider as RestModelProvider, + AzureOpenAiFineTuning as RestAzureOpenAIFineTuning, + FineTuningJob as RestFineTuningJob, + JobBase as RestJobBase, +) +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs + +from azure.ai.ml.entities._job.finetuning.finetuning_vertical import FineTuningVertical +from azure.ai.ml.entities._job.finetuning.azure_openai_hyperparameters import AzureOpenAIHyperparameters +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException +from azure.ai.ml._utils._experimental import experimental + + +@experimental +class AzureOpenAIFineTuningJob(FineTuningVertical): + def __init__( + self, + **kwargs: Any, + ) -> None: + # Extract any task specific settings + model = kwargs.pop("model", None) + task = kwargs.pop("task", None) + # Convert task to lowercase first letter, this is when we create + # object from the schema, using dict object from the REST api response. + # TextCompletion => textCompletion + if task: + task = task[0].lower() + task[1:] + training_data = kwargs.pop("training_data", None) + validation_data = kwargs.pop("validation_data", None) + hyperparameters = kwargs.pop("hyperparameters", None) + if hyperparameters and not isinstance(hyperparameters, AzureOpenAIHyperparameters): + raise ValidationException( + category=ErrorCategory.USER_ERROR, + target=ErrorTarget.JOB, + message="Hyperparameters if provided should of type AzureOpenAIHyperparameters", + no_personal_data_message="Hyperparameters if provided should of type AzureOpenAIHyperparameters", + ) + + self._hyperparameters = hyperparameters + + super().__init__( + task=task, + model=model, + model_provider=RestModelProvider.AZURE_OPEN_AI, + training_data=training_data, + validation_data=validation_data, + **kwargs, + ) + + @property + def hyperparameters(self) -> AzureOpenAIHyperparameters: + """Get hyperparameters. + + :return: Hyperparameters for finetuning the model. + :rtype: AzureOpenAIHyperparameters + """ + return self._hyperparameters + + @hyperparameters.setter + def hyperparameters(self, hyperparameters: AzureOpenAIHyperparameters) -> None: + """Set hyperparameters. + + :param hyperparameters: Hyperparameters for finetuning the model. + :type hyperparameters: AzureOpenAiHyperParameters + """ + self._hyperparameters = hyperparameters + + def _to_rest_object(self) -> "RestFineTuningJob": + """Convert CustomFineTuningVertical object to a RestFineTuningJob object. + + :return: REST object representation of this object. + :rtype: JobBase + """ + aoai_finetuning_vertical = RestAzureOpenAIFineTuning( + task_type=self._task, + model=self._model, + model_provider=self._model_provider, + training_data=self._training_data, + validation_data=self._validation_data, + hyper_parameters=self.hyperparameters._to_rest_object() if self.hyperparameters else None, + ) + + self._resolve_inputs(aoai_finetuning_vertical) + + finetuning_job = RestFineTuningJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + properties=self.properties, + fine_tuning_details=aoai_finetuning_vertical, + outputs=to_rest_data_outputs(self.outputs), + ) + + result = RestJobBase(properties=finetuning_job) + result.name = self.name + + return result + + def _to_dict(self) -> Dict: + """Convert the object to a dictionary. + + :return: dictionary representation of the object. + :rtype: typing.Dict + """ + from azure.ai.ml._schema._finetuning.azure_openai_finetuning import AzureOpenAIFineTuningSchema + + schema_dict: dict = {} + # TODO: Combeback to this later for FineTuningJob in Pipelines + # if inside_pipeline: + # schema_dict = AutoMLClassificationNodeSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + # else: + schema_dict = AzureOpenAIFineTuningSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, AzureOpenAIFineTuningJob): + return NotImplemented + + return super().__eq__(other) and self.hyperparameters == other.hyperparameters + + def __ne__(self, other: object) -> bool: + """Check inequality between two AzureOpenAIFineTuningJob objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) + + @classmethod + def _from_rest_object(cls, obj: RestJobBase) -> "AzureOpenAIFineTuningJob": + """Convert a REST object to AzureOpenAIFineTuningJob object. + + :param obj: AzureOpenAIFineTuningJob in Rest format. + :type obj: JobBase + :return: AzureOpenAIFineTuningJob objects. + :rtype: AzureOpenAIFineTuningJob + """ + + properties: RestFineTuningJob = obj.properties + finetuning_details: RestAzureOpenAIFineTuning = properties.fine_tuning_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "outputs": from_rest_data_outputs(properties.outputs), + } + + aoai_finetuning_job = cls( + task=finetuning_details.task_type, + model=finetuning_details.model, + training_data=finetuning_details.training_data, + validation_data=finetuning_details.validation_data, + hyperparameters=AzureOpenAIHyperparameters._from_rest_object(finetuning_details.hyper_parameters), + **job_args_dict, + ) + + aoai_finetuning_job._restore_inputs() + + return aoai_finetuning_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "AzureOpenAIFineTuningJob": + """Load from a dictionary. + + :param data: dictionary representation of the object. + :type data: typing.Dict + :param context: dictionary containing the context. + :type context: typing.Dict + :param additional_message: additional message to be added to the error message. + :type additional_message: str + :return: AzureOpenAIFineTuningJob object. + :rtype: AzureOpenAIFineTuningJob + """ + from azure.ai.ml._schema._finetuning.azure_openai_finetuning import AzureOpenAIFineTuningSchema + + # TODO: Combeback to this later - Pipeline part. + # from azure.ai.ml._schema.pipeline.automl_node import AutoMLClassificationNodeSchema + + # if kwargs.pop("inside_pipeline", False): + # loaded_data = load_from_dict( + # AutoMLClassificationNodeSchema, + # data, + # context, + # additional_message, + # **kwargs, + # ) + # else: + loaded_data = load_from_dict(AzureOpenAIFineTuningSchema, data, context, additional_message, **kwargs) + + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "AzureOpenAIFineTuningJob": + """Create an instance from a schema dictionary. + + :param loaded_data: dictionary containing the data. + :type loaded_data: typing.Dict + :return: AzureOpenAIFineTuningJob object. + :rtype: AzureOpenAIFineTuningJob + """ + + job = AzureOpenAIFineTuningJob(**loaded_data) + return job diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/azure_openai_hyperparameters.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/azure_openai_hyperparameters.py new file mode 100644 index 00000000..2b420a46 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/azure_openai_hyperparameters.py @@ -0,0 +1,125 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional +from azure.ai.ml.entities._mixins import RestTranslatableMixin +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + AzureOpenAiHyperParameters as RestAzureOpenAiHyperParameters, +) +from azure.ai.ml._utils._experimental import experimental + + +@experimental +class AzureOpenAIHyperparameters(RestTranslatableMixin): + """Hyperparameters for Azure OpenAI model finetuning.""" + + def __init__( + self, + *, + batch_size: Optional[int] = None, + learning_rate_multiplier: Optional[float] = None, + n_epochs: Optional[int] = None, + ): + """Initialize AzureOpenAIHyperparameters. + + param batch_size: Number of examples in each batch. + A larger batch size means that model parameters are updated less + frequently, but with lower variance. Defaults to None. + type batch_size: int + param learning_rate_multiplier: Scaling factor for the learning rate. + A smaller learning rate may be useful to avoid overfitting. + type learning_rate_multiplier: float + param n_epochs: The number of epochs to train the model for. + An epoch refers to one full cycle through the training dataset. + type n_epochs: int + """ + self._batch_size = batch_size + self._learning_rate_multiplier = learning_rate_multiplier + self._n_epochs = n_epochs + # Not exposed in the public API, so need to check how to handle this + # self._additional_properties = kwargs + + @property + def batch_size(self) -> Optional[int]: + """Get the batch size for training.""" + return self._batch_size + + @batch_size.setter + def batch_size(self, value: Optional[int]) -> None: + """Set the batch size for training. + :param value: The batch size for training. + :type value: int + """ + self._batch_size = value + + @property + def learning_rate_multiplier(self) -> Optional[float]: + """Get the learning rate multiplier. + :return: The learning rate multiplier. + :rtype: float + """ + return self._learning_rate_multiplier + + @learning_rate_multiplier.setter + def learning_rate_multiplier(self, value: Optional[float]) -> None: + """Set the learning rate multiplier. + :param value: The learning rate multiplier. + :type value: float + """ + self._learning_rate_multiplier = value + + @property + def n_epochs(self) -> Optional[int]: + """Get the number of epochs. + :return: The number of epochs. + :rtype: int + """ + return self._n_epochs + + @n_epochs.setter + def n_epochs(self, value: Optional[int]) -> None: + """Set the number of epochs. + :param value: The number of epochs. + :type value: int + """ + self._n_epochs = value + + # Not exposed in the public API, so need to check how to handle this + # @property + # def additional_properties(self) -> dict: + # """Get additional properties.""" + # return self._additional_properties + + # @additional_properties.setter + # def additional_properties(self, value: dict) -> None: + # """Set additional properties.""" + # self._additional_properties = value + + def _to_rest_object(self) -> RestAzureOpenAiHyperParameters: + return RestAzureOpenAiHyperParameters( + batch_size=self._batch_size, + learning_rate_multiplier=self._learning_rate_multiplier, + n_epochs=self._n_epochs, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, AzureOpenAIHyperparameters): + return NotImplemented + return ( + self._batch_size == other._batch_size + and self._learning_rate_multiplier == other._learning_rate_multiplier + and self._n_epochs == other._n_epochs + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + @classmethod + def _from_rest_object(cls, obj: RestAzureOpenAiHyperParameters) -> "AzureOpenAIHyperparameters": + aoai_hyperparameters = cls( + batch_size=obj.batch_size, + learning_rate_multiplier=obj.learning_rate_multiplier, + n_epochs=obj.n_epochs, + ) + return aoai_hyperparameters diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/custom_model_finetuning_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/custom_model_finetuning_job.py new file mode 100644 index 00000000..e6ddd86d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/custom_model_finetuning_job.py @@ -0,0 +1,258 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict + +from azure.ai.ml._restclient.v2024_10_01_preview.models import ( + ModelProvider as RestModelProvider, + CustomModelFineTuning as RestCustomModelFineTuningVertical, + FineTuningJob as RestFineTuningJob, + JobBase as RestJobBase, +) +from azure.ai.ml.entities._job._input_output_helpers import ( + from_rest_data_outputs, + to_rest_data_outputs, +) +from azure.ai.ml.entities._job.job_resources import JobResources +from azure.ai.ml.entities._job.queue_settings import QueueSettings +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.entities._job.finetuning.finetuning_vertical import FineTuningVertical +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml._utils._experimental import experimental + + +@experimental +class CustomModelFineTuningJob(FineTuningVertical): + def __init__( + self, + **kwargs: Any, + ) -> None: + # Extract any task specific settings + model = kwargs.pop("model", None) + task = kwargs.pop("task", None) + # Convert task to lowercase first letter, this is when we create + # object from the schema, using dict object from the REST api response. + # TextCompletion => textCompletion + if task: + task = task[0].lower() + task[1:] + training_data = kwargs.pop("training_data", None) + validation_data = kwargs.pop("validation_data", None) + self._hyperparameters = kwargs.pop("hyperparameters", None) + super().__init__( + task=task, + model=model, + model_provider=RestModelProvider.CUSTOM, + training_data=training_data, + validation_data=validation_data, + **kwargs, + ) + + @property + def hyperparameters(self) -> Dict[str, str]: + """Get hyperparameters. + + :return: + :rtype: hyperparameters: Dict[str,str] + """ + return self._hyperparameters + + @hyperparameters.setter + def hyperparameters(self, hyperparameters: Dict[str, str]) -> None: + """Set hyperparameters. + + :param hyperparameters: Hyperparameters for finetuning the model + :type hyperparameters: Dict[str,str] + """ + self._hyperparameters = hyperparameters + + def _to_rest_object(self) -> "RestFineTuningJob": + """Convert CustomFineTuningVertical object to a RestFineTuningJob object. + + :return: REST object representation of this object. + :rtype: JobBase + """ + custom_finetuning_vertical = RestCustomModelFineTuningVertical( + task_type=self._task, + model=self._model, + model_provider=self._model_provider, + training_data=self._training_data, + validation_data=self._validation_data, + hyper_parameters=self._hyperparameters, + ) + self._resolve_inputs(custom_finetuning_vertical) + + finetuning_job = RestFineTuningJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + services=self.services, + tags=self.tags, + properties=self.properties, + compute_id=self.compute, + fine_tuning_details=custom_finetuning_vertical, + outputs=to_rest_data_outputs(self.outputs), + ) + if self.resources: + finetuning_job.resources = self.resources._to_rest_object() + if self.queue_settings: + finetuning_job.queue_settings = self.queue_settings._to_rest_object() + + result = RestJobBase(properties=finetuning_job) + result.name = self.name + + return result + + def _to_dict(self) -> Dict: + """Convert the object to a dictionary. + + :return: dictionary representation of the object. + :rtype: typing.Dict + """ + from azure.ai.ml._schema._finetuning.custom_model_finetuning import ( + CustomModelFineTuningSchema, + ) + + schema_dict: dict = {} + # TODO: Combeback to this later for FineTuningJob in pipeline + # if inside_pipeline: + # schema_dict = AutoMLClassificationNodeSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + # else: + schema_dict = CustomModelFineTuningSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, CustomModelFineTuningJob): + return NotImplemented + + return super().__eq__(other) and self.hyperparameters == other.hyperparameters + + def __ne__(self, other: object) -> bool: + """Check inequality between two CustomModelFineTuningJob objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) + + @classmethod + def _from_rest_object(cls, obj: RestJobBase) -> "CustomModelFineTuningJob": + """Convert a REST object to CustomModelFineTuningJob object. + + :param obj: CustomModelFineTuningJob in Rest format. + :type obj: JobBase + :return: CustomModelFineTuningJob objects. + :rtype: CustomModelFineTuningJob + """ + + properties: RestFineTuningJob = obj.properties + finetuning_details: RestCustomModelFineTuningVertical = properties.fine_tuning_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "services": properties.services, + "experiment_name": properties.experiment_name, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + } + + if properties.resources: + job_args_dict["resources"] = JobResources._from_rest_object(properties.resources) + if properties.queue_settings: + job_args_dict["queue_settings"] = QueueSettings._from_rest_object(properties.queue_settings) + + custom_model_finetuning_job = cls( + task=finetuning_details.task_type, + model=finetuning_details.model, + training_data=finetuning_details.training_data, + validation_data=finetuning_details.validation_data, + hyperparameters=finetuning_details.hyper_parameters, + **job_args_dict, + ) + + custom_model_finetuning_job._restore_inputs() + + return custom_model_finetuning_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "CustomModelFineTuningJob": + """Load from a dictionary. + + :param data: dictionary representation of the object. + :type data: typing.Dict + :param context: dictionary containing the context. + :type context: typing.Dict + :param additional_message: additional message to be added to the error message. + :type additional_message: str + :return: CustomModelFineTuningJob object. + :rtype: CustomModelFineTuningJob + """ + from azure.ai.ml._schema._finetuning.custom_model_finetuning import ( + CustomModelFineTuningSchema, + ) + + # TODO: Combeback to this later - Pipeline part. + # from azure.ai.ml._schema.pipeline.automl_node import AutoMLClassificationNodeSchema + + # if kwargs.pop("inside_pipeline", False): + # loaded_data = load_from_dict( + # AutoMLClassificationNodeSchema, + # data, + # context, + # additional_message, + # **kwargs, + # ) + # else: + loaded_data = load_from_dict(CustomModelFineTuningSchema, data, context, additional_message, **kwargs) + + training_data = loaded_data.get("training_data", None) + if isinstance(training_data, str): + loaded_data["training_data"] = Input(type="uri_file", path=training_data) + + validation_data = loaded_data.get("validation_data", None) + if isinstance(validation_data, str): + loaded_data["validation_data"] = Input(type="uri_file", path=validation_data) + + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "CustomModelFineTuningJob": + """Create an instance from a schema dictionary. + + :param loaded_data: dictionary containing the data. + :type loaded_data: typing.Dict + :return: CustomModelFineTuningJob object. + :rtype: CustomModelFineTuningJob + """ + job = CustomModelFineTuningJob(**loaded_data) + return job diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/finetuning_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/finetuning_job.py new file mode 100644 index 00000000..ec8d9d5d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/finetuning_job.py @@ -0,0 +1,224 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional + +from azure.ai.ml.entities._job.job import Job +from azure.ai.ml.entities._job.job_io_mixin import JobIOMixin +from azure.ai.ml._restclient.v2024_10_01_preview.models import ( + ModelProvider as RestModelProvider, + JobBase as RestJobBase, +) +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import TYPE +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._job.job_resources import JobResources +from azure.ai.ml.entities._job.queue_settings import QueueSettings +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException +from azure.ai.ml.constants._job.finetuning import FineTuningConstants +from azure.ai.ml._utils._experimental import experimental + + +@experimental +class FineTuningJob(Job, JobIOMixin): + def __init__( + self, + **kwargs: Any, + ) -> None: + kwargs[TYPE] = JobType.FINE_TUNING + self.resources = kwargs.pop("resources", None) + self.queue_settings = kwargs.pop("queue_settings", None) + self.outputs = kwargs.pop("outputs", None) + super().__init__(**kwargs) + + @property + def resources(self) -> Optional[JobResources]: + """Job resources to use during job execution. + :return: Job Resources object. + :rtype: JobResources + """ + return self._resources if hasattr(self, "_resources") else None + + @resources.setter + def resources(self, value: JobResources) -> None: + """Set JobResources. + + :param value: JobResources object. + :type value: JobResources + :raises ValidationException: Expected a JobResources object. + """ + if isinstance(value, JobResources): + self._resources = value + elif value: + msg = "Expected an instance of JobResources." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.FINETUNING, + error_category=ErrorCategory.USER_ERROR, + ) + + @property + def queue_settings(self) -> Optional[QueueSettings]: + """Queue settings for job execution. + :return: QueueSettings object. + :rtype: QueueSettings + """ + return self._queue_settings if hasattr(self, "_queue_settings") else None + + @queue_settings.setter + def queue_settings(self, value: QueueSettings) -> None: + """Set queue settings for job execution. + + :param value: QueueSettings object. + :type value: QueueSettings + :raises ValidationException: Expected a QueueSettings object. + """ + if isinstance(value, QueueSettings): + self._queue_settings = value + elif value: + msg = "Expected an instance of QueueSettings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.FINETUNING, + error_category=ErrorCategory.USER_ERROR, + ) + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, FineTuningJob): + return NotImplemented + + queue_settings_match = (not self.queue_settings and not other.queue_settings) or ( + self.queue_settings is not None + and other.queue_settings is not None + and self.queue_settings.job_tier is not None + and other.queue_settings.job_tier is not None + and self.queue_settings.job_tier.lower() == other.queue_settings.job_tier.lower() + ) + + outputs_match = not self.outputs and not other.outputs + if self.outputs and other.outputs: + outputs_match = ( + self.outputs["registered_model"].name == other.outputs["registered_model"].name + and self.outputs["registered_model"].type == other.outputs["registered_model"].type + ) + + return ( + outputs_match + and self.resources == other.resources + and queue_settings_match + # add properties from base class + and self.name == other.name + and self.description == other.description + and self.tags == other.tags + and self.properties == other.properties + and self.compute == other.compute + and self.id == other.id + and self.experiment_name == other.experiment_name + and self.status == other.status + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two FineTuningJob objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) + + @classmethod + def _get_model_provider_mapping(cls) -> Dict: + """Create a mapping of task type to job class. + + :return: An FineTuningVertical object containing the model provider type to job class mapping. + :rtype: FineTuningJob + """ + from .custom_model_finetuning_job import CustomModelFineTuningJob + from .azure_openai_finetuning_job import AzureOpenAIFineTuningJob + + return { + camel_to_snake(RestModelProvider.CUSTOM): CustomModelFineTuningJob, + camel_to_snake(RestModelProvider.AZURE_OPEN_AI): AzureOpenAIFineTuningJob, + } + + @classmethod + def _load_from_rest(cls, obj: RestJobBase) -> "FineTuningJob": + """Loads the rest object to a dict containing items to init the AutoMLJob objects. + + :param obj: Azure Resource Manager resource envelope. + :type obj: JobBase + :raises ValidationException: task type validation error + :return: A FineTuningJob + :rtype: FineTuningJob + """ + model_provider = ( + camel_to_snake(obj.properties.fine_tuning_details.model_provider) + if obj.properties.fine_tuning_details.model_provider + else None + ) + class_type = cls._get_model_provider_mapping().get(model_provider, None) + if class_type: + res: FineTuningJob = class_type._from_rest_object(obj) + return res + msg = f"Unsupported model provider type: {obj.properties.fine_tuning_details.model_provider}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.FINETUNING, + error_category=ErrorCategory.SYSTEM_ERROR, + ) + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "FineTuningJob": + """Loads the dictionary objects to an FineTuningJob object. + + :param data: A data dictionary. + :type data: typing.Dict + :param context: A context dictionary. + :type context: typing.Dict + :param additional_message: An additional message to be logged in the ValidationException. + :type additional_message: str + + :raises ValidationException: task type validation error + :return: An FineTuningJob + :rtype: FineTuningJob + """ + model_provider = data.get(FineTuningConstants.ModelProvider) + class_type = cls._get_model_provider_mapping().get(model_provider, None) + if class_type: + res: FineTuningJob = class_type._load_from_dict( + data, + context, + additional_message, + **kwargs, + ) + return res + msg = f"Unsupported model provider type: {model_provider}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/finetuning_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/finetuning_vertical.py new file mode 100644 index 00000000..c9a5fe41 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/finetuning/finetuning_vertical.py @@ -0,0 +1,202 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Any, Optional, cast + +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException +from azure.ai.ml._restclient.v2024_10_01_preview.models import ( + ModelProvider as RestModelProvider, + FineTuningVertical as RestFineTuningVertical, + UriFileJobInput, + MLFlowModelJobInput, +) +from azure.ai.ml.constants._common import AssetTypes +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.finetuning.finetuning_job import FineTuningJob + +from azure.ai.ml._utils._experimental import experimental + + +@experimental +class FineTuningVertical(FineTuningJob): + def __init__( + self, + *, + task: str, + model: Input, + model_provider: Optional[str], + training_data: Input, + validation_data: Optional[Input] = None, + **kwargs: Any, + ) -> None: + self._task = task + self._model = model + self._model_provider = model_provider + self._training_data = training_data + self._validation_data = validation_data + super().__init__(**kwargs) + + @property + def task(self) -> str: + """Get finetuning task. + + :return: The type of task to run. Possible values include: "ChatCompletion" + "TextCompletion", "TextClassification", "QuestionAnswering","TextSummarization", + "TokenClassification", "TextTranslation", "ImageClassification", "ImageInstanceSegmentation", + "ImageObjectDetection","VideoMultiObjectTracking". + + :rtype: str + """ + return self._task + + @task.setter + def task(self, task: str) -> None: + """Set finetuning task. + + :param task: The type of task to run. Possible values include: "ChatCompletion" + "TextCompletion", "TextClassification", "QuestionAnswering","TextSummarization", + "TokenClassification", "TextTranslation", "ImageClassification", "ImageInstanceSegmentation", + "ImageObjectDetection","VideoMultiObjectTracking",. + :type task: str + + :return: None + """ + self._task = task + + @property + def model(self) -> Optional[Input]: + """The model to be fine-tuned. + :return: Input object representing the mlflow model to be fine-tuned. + :rtype: Input + """ + return self._model + + @model.setter + def model(self, value: Input) -> None: + """Set the model to be fine-tuned. + + :param value: Input object representing the mlflow model to be fine-tuned. + :type value: Input + :raises ValidationException: Expected a mlflow model input. + """ + if isinstance(value, Input) and (cast(Input, value).type in ("mlflow_model", "custom_model")): + self._model = value + else: + msg = "Expected a mlflow model input or custom model input." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.FINETUNING, + error_category=ErrorCategory.USER_ERROR, + ) + + @property + def model_provider(self) -> Optional[str]: + """The model provider. + :return: The model provider. + :rtype: str + """ + return self._model_provider + + @model_provider.setter + def model_provider(self, value: str) -> None: + """Set the model provider. + + :param value: The model provider. + :type value: str + """ + self._model_provider = RestModelProvider[camel_to_snake(value).upper()] if value else None + + @property + def training_data(self) -> Input: + """Get training data. + + :return: Training data input + :rtype: Input + """ + return self._training_data + + @training_data.setter + def training_data(self, training_data: Input) -> None: + """Set training data. + + :param training_data: Training data input + :type training_data: Input + """ + self._training_data = training_data + + @property + def validation_data(self) -> Optional[Input]: + """Get validation data. + + :return: Validation data input + :rtype: Input + """ + return self._validation_data + + @validation_data.setter + def validation_data(self, validation_data: Input) -> None: + """Set validation data. + + :param validation_data: Validation data input + :type validation_data: Input + """ + self._validation_data = validation_data + + def _resolve_inputs(self, rest_job: RestFineTuningVertical) -> None: + """Resolve JobInputs to UriFileJobInput within data_settings. + + :param rest_job: The rest job object. + :type rest_job: RestFineTuningVertical + """ + if isinstance(rest_job.training_data, Input): + rest_job.training_data = UriFileJobInput(uri=rest_job.training_data.path) + if isinstance(rest_job.validation_data, Input): + rest_job.validation_data = UriFileJobInput(uri=rest_job.validation_data.path) + if isinstance(rest_job.model, Input): + rest_job.model = MLFlowModelJobInput(uri=rest_job.model.path) + + def _restore_inputs(self) -> None: + """Restore UriFileJobInputs to JobInputs within data_settings.""" + if isinstance(self.training_data, UriFileJobInput): + self.training_data = Input(type=AssetTypes.URI_FILE, path=self.training_data.uri) + if isinstance(self.validation_data, UriFileJobInput): + self.validation_data = Input(type=AssetTypes.URI_FILE, path=self.validation_data.uri) + if isinstance(self.model, MLFlowModelJobInput): + self.model = Input(type=AssetTypes.MLFLOW_MODEL, path=self.model.uri) + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, FineTuningVertical): + return NotImplemented + + return ( + # TODO: Equality from base class does not work, no current precedence for this + super().__eq__(other) + and self.task == other.task + and self.model == other.model + and self.model_provider == other.model_provider + and self.training_data == other.training_data + and self.validation_data == other.validation_data + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two FineTuningJob objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/import_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/import_job.py new file mode 100644 index 00000000..24d4ec90 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/import_job.py @@ -0,0 +1,285 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from abc import ABC, abstractmethod +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Optional + +from azure.ai.ml._restclient.v2022_02_01_preview.models import CommandJob as RestCommandJob +from azure.ai.ml._restclient.v2022_02_01_preview.models import JobBaseData +from azure.ai.ml._schema.job.import_job import ImportJobSchema +from azure.ai.ml._utils.utils import is_private_preview_enabled +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.entities._inputs_outputs import Output +from azure.ai.ml.entities._job._input_output_helpers import ( + from_rest_data_outputs, + from_rest_inputs_to_dataset_literal, + to_rest_data_outputs, + to_rest_dataset_literal_inputs, +) +from azure.ai.ml.entities._job.job_io_mixin import JobIOMixin +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml.exceptions import MlException + +from .job import Job + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._builders import Import + from azure.ai.ml.entities._component.import_component import ImportComponent + +module_logger = logging.getLogger(__name__) + + +class ImportSource(ABC): + def __init__( + self, + *, + type: Optional[str] = None, # pylint: disable=redefined-builtin + connection: Optional[str] = None, + ): + self.type = type + self.connection = connection + + @abstractmethod + def _to_job_inputs(self) -> Dict[str, Optional[str]]: + pass + + @classmethod + def _from_job_inputs(cls, job_inputs: Dict[str, str]) -> "ImportSource": + """Translate job inputs to import source. + + :param job_inputs: The job inputs + :type job_inputs: Dict[str, str] + :return: The import source + :rtype: ImportSource + """ + type = job_inputs.get("type") # pylint: disable=redefined-builtin + connection = job_inputs.get("connection") + query = job_inputs.get("query") + path = job_inputs.get("path") + + import_source = ( + DatabaseImportSource(type=type, connection=connection, query=query) + if query is not None + else FileImportSource(type=type, connection=connection, path=path) + ) + return import_source + + +class DatabaseImportSource(ImportSource): + def __init__( + self, + *, + type: Optional[str] = None, # pylint: disable=redefined-builtin + connection: Optional[str] = None, + query: Optional[str] = None, + ): + ImportSource.__init__( + self, + type=type, + connection=connection, + ) + self.query = query + + def _to_job_inputs(self) -> Dict[str, Optional[str]]: + """Translate source to command Inputs. + + :return: The job inputs dict + :rtype: Dict[str, str] + """ + inputs = { + "type": self.type, + "connection": self.connection, + "query": self.query, + } + return inputs + + +class FileImportSource(ImportSource): + def __init__( + self, + *, + type: Optional[str] = None, # pylint: disable=redefined-builtin + connection: Optional[str] = None, + path: Optional[str] = None, + ): + ImportSource.__init__( + self, + type=type, + connection=connection, + ) + self.path = path + + def _to_job_inputs(self) -> Dict[str, Optional[str]]: + """Translate source to command Inputs. + + :return: The job inputs dict + :rtype: Dict[str, str] + """ + inputs = { + "type": self.type, + "connection": self.connection, + "path": self.path, + } + return inputs + + +class ImportJob(Job, JobIOMixin): + """Import job. + + :param name: Name of the job. + :type name: str + :param description: Description of the job. + :type description: str + :param display_name: Display name of the job. + :type display_name: str + :param experiment_name: Name of the experiment the job will be created under. + If None is provided, default will be set to current directory name. + :type experiment_name: str + :param source: Input source parameters to the import job. + :type source: azure.ai.ml.entities.DatabaseImportSource or FileImportSource + :param output: output data binding used in the job. + :type output: azure.ai.ml.Output + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__( + self, + *, + name: Optional[str] = None, + description: Optional[str] = None, + display_name: Optional[str] = None, + experiment_name: Optional[str] = None, + source: Optional[ImportSource] = None, + output: Optional[Output] = None, + **kwargs: Any, + ): + kwargs[TYPE] = JobType.IMPORT + + Job.__init__( + self, + name=name, + display_name=display_name, + description=description, + experiment_name=experiment_name, + **kwargs, + ) + + self.source = source + self.output = output + + def _to_dict(self) -> Dict: + res: dict = ImportJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def _to_rest_object(self) -> JobBaseData: + # TODO: Remove in PuP + if not is_private_preview_enabled(): + msg = JobType.IMPORT + " job not supported." + raise MlException(message=msg, no_personal_data_message=msg) + + _inputs = self.source._to_job_inputs() if self.source is not None else None # pylint: disable=protected-access + if self.compute is None: + msg = "compute cannot be None." + raise MlException(message=msg, no_personal_data_message=msg) + + properties = RestCommandJob( + display_name=self.display_name, + description=self.description, + compute_id=self.compute, + experiment_name=self.experiment_name, + inputs=to_rest_dataset_literal_inputs(_inputs, job_type=self.type), + outputs=to_rest_data_outputs({"output": self.output}), + # TODO: Remove in PuP with native import job/component type support in MFE/Designer + # No longer applicable once new import job type is ready on MFE in PuP + # command and environment are required as we use command type for import + # command can be random string and the particular environment name here is defined as default in MFE + # public const string DefaultEnvironmentName = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu"; + # which is considered valid environment in MFE unless MFE changes current default logic + # but chance should be very low in PrP + command="import", + environment_id=self.compute.replace( + "/computes/DataFactory", "/environments/AzureML-sklearn-0.24-ubuntu18.04-py37-cpu" + ), + ) + result = JobBaseData(properties=properties) + result.name = self.name + return result + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "ImportJob": + loaded_data = load_from_dict(ImportJobSchema, data, context, additional_message, **kwargs) + return ImportJob(base_path=context[BASE_PATH_CONTEXT_KEY], **loaded_data) + + @classmethod + def _load_from_rest(cls, obj: JobBaseData) -> "ImportJob": + rest_command_job: RestCommandJob = obj.properties + outputs = from_rest_data_outputs(rest_command_job.outputs) + inputs = from_rest_inputs_to_dataset_literal(rest_command_job.inputs) + + import_job = ImportJob( + name=obj.name, + id=obj.id, + display_name=rest_command_job.display_name, + description=rest_command_job.description, + experiment_name=rest_command_job.experiment_name, + status=rest_command_job.status, + creation_context=obj.system_data, + source=ImportSource._from_job_inputs(inputs), # pylint: disable=protected-access + output=outputs["output"] if "output" in outputs else None, + ) + return import_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "ImportComponent": + """Translate a import job to component. + + :param context: Context of import job YAML file. + :type context: dict + :return: Translated import component. + :rtype: ImportComponent + """ + from azure.ai.ml.entities._component.import_component import ImportComponent + + pipeline_job_dict = kwargs.get("pipeline_job_dict", {}) + context = context or {BASE_PATH_CONTEXT_KEY: Path("import/")} + + _inputs = self.source._to_job_inputs() if self.source is not None else None # pylint: disable=protected-access + + # Create anonymous command component with default version as 1 + return ImportComponent( + is_anonymous=True, + base_path=context[BASE_PATH_CONTEXT_KEY], + description=self.description, + source=self._to_inputs( + inputs=_inputs, + pipeline_job_dict=pipeline_job_dict, + ), + output=self._to_outputs(outputs={"output": self.output}, pipeline_job_dict=pipeline_job_dict)["output"], + ) + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "Import": + """Translate a import job to a pipeline node. + + :param context: Context of import job YAML file. + :type context: dict + :return: Translated import node. + :rtype: Import + """ + from azure.ai.ml.entities._builders import Import + + component = self._to_component(context, **kwargs) + _inputs = self.source._to_job_inputs() if self.source is not None else None # pylint: disable=protected-access + return Import( + component=component, + compute=self.compute, + inputs=_inputs, + outputs={"output": self.output}, + description=self.description, + display_name=self.display_name, + properties=self.properties, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/input_output_entry.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/input_output_entry.py new file mode 100644 index 00000000..aa0e73b1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/input_output_entry.py @@ -0,0 +1,27 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import collections.abc +import logging +from typing import Any, Optional, Union + +from azure.ai.ml.constants import InputOutputModes +from azure.ai.ml.entities._assets import Data +from azure.ai.ml.entities._mixins import DictMixin + +module_logger = logging.getLogger(__name__) + + +class InputOutputEntry(DictMixin): + def __init__( + self, # pylint: disable=unused-argument + data: Optional[Union[str, "Data"]] = None, + mode: Optional[str] = InputOutputModes.MOUNT, + **kwargs: Any, + ): + # Data will be either a dataset id, inline dataset definition + self.data = data + self.mode = mode + if isinstance(self.data, collections.abc.Mapping) and not isinstance(self.data, Data): + self.data = Data(**self.data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/input_port.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/input_port.py new file mode 100644 index 00000000..7953bbde --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/input_port.py @@ -0,0 +1,18 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from typing import Optional, Union + +module_logger = logging.getLogger(__name__) + + +class InputPort: + def __init__(self, *, type_string: str, default: Optional[str] = None, optional: Optional[bool] = False): + self.type_string = type_string + self.optional = optional + if self.type_string == "number" and default is not None: + self.default: Union[float, Optional[str]] = float(default) + else: + self.default = default diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job.py new file mode 100644 index 00000000..b181636e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job.py @@ -0,0 +1,363 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import json +import logging +import traceback +from abc import abstractmethod +from collections import OrderedDict +from os import PathLike +from pathlib import Path +from typing import IO, Any, AnyStr, Dict, List, Optional, Tuple, Type, Union + +from azure.ai.ml._restclient.runhistory.models import Run +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, JobService +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobType as RestJobType +from azure.ai.ml._restclient.v2024_01_01_preview.models import JobBase as JobBase_2401 +from azure.ai.ml._restclient.v2024_01_01_preview.models import JobType as RestJobType_20240101Preview +from azure.ai.ml._utils._html_utils import make_link, to_html +from azure.ai.ml._utils.utils import dump_yaml_to_file +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, PARAMS_OVERRIDE_KEY, CommonYamlFields +from azure.ai.ml.constants._compute import ComputeType +from azure.ai.ml.constants._job.job import JobServices, JobType +from azure.ai.ml.entities._mixins import TelemetryMixin +from azure.ai.ml.entities._resource import Resource +from azure.ai.ml.entities._util import find_type_in_override +from azure.ai.ml.exceptions import ( + ErrorCategory, + ErrorTarget, + JobException, + JobParsingError, + PipelineChildJobError, + ValidationErrorType, + ValidationException, +) + +from ._studio_url_from_job_id import studio_url_from_job_id +from .pipeline._component_translatable import ComponentTranslatableMixin + +module_logger = logging.getLogger(__name__) + + +def _is_pipeline_child_job(job: JobBase) -> bool: + # pipeline child job has no properties, so we can check through testing job.properties + # if backend has spec changes, this method need to be updated + return job.properties is None + + +class Job(Resource, ComponentTranslatableMixin, TelemetryMixin): + """Base class for jobs. + + This class should not be instantiated directly. Instead, use one of its subclasses. + + :param name: The name of the job. + :type name: Optional[str] + :param display_name: The display name of the job. + :type display_name: Optional[str] + :param description: The description of the job. + :type description: Optional[str] + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: Optional[dict[str, str]] + :param properties: The job property dictionary. + :type properties: Optional[dict[str, str]] + :param experiment_name: The name of the experiment the job will be created under. Defaults to the name of the + current directory. + :type experiment_name: Optional[str] + :param services: Information on services associated with the job. + :type services: Optional[dict[str, ~azure.ai.ml.entities.JobService]] + :param compute: Information about the compute resources associated with the job. + :type compute: Optional[str] + """ + + def __init__( + self, + name: Optional[str] = None, + display_name: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[Dict] = None, + properties: Optional[Dict] = None, + experiment_name: Optional[str] = None, + compute: Optional[str] = None, + services: Optional[Dict[str, JobService]] = None, + **kwargs: Any, + ) -> None: + self._type: Optional[str] = kwargs.pop("type", JobType.COMMAND) + self._status: Optional[str] = kwargs.pop("status", None) + self._log_files: Optional[Dict] = kwargs.pop("log_files", None) + + super().__init__( + name=name, + description=description, + tags=tags, + properties=properties, + **kwargs, + ) + + self.display_name = display_name + self.experiment_name = experiment_name + self.compute: Any = compute + self.services = services + + @property + def type(self) -> Optional[str]: + """The type of the job. + + :return: The type of the job. + :rtype: Optional[str] + """ + return self._type + + @property + def status(self) -> Optional[str]: + """The status of the job. + + Common values returned include "Running", "Completed", and "Failed". All possible values are: + + * NotStarted - This is a temporary state that client-side Run objects are in before cloud submission. + * Starting - The Run has started being processed in the cloud. The caller has a run ID at this point. + * Provisioning - On-demand compute is being created for a given job submission. + * Preparing - The run environment is being prepared and is in one of two stages: + * Docker image build + * conda environment setup + * Queued - The job is queued on the compute target. For example, in BatchAI, the job is in a queued state + while waiting for all the requested nodes to be ready. + * Running - The job has started to run on the compute target. + * Finalizing - User code execution has completed, and the run is in post-processing stages. + * CancelRequested - Cancellation has been requested for the job. + * Completed - The run has completed successfully. This includes both the user code execution and run + post-processing stages. + * Failed - The run failed. Usually the Error property on a run will provide details as to why. + * Canceled - Follows a cancellation request and indicates that the run is now successfully cancelled. + * NotResponding - For runs that have Heartbeats enabled, no heartbeat has been recently sent. + + :return: Status of the job. + :rtype: Optional[str] + """ + return self._status + + @property + def log_files(self) -> Optional[Dict[str, str]]: + """Job output files. + + :return: The dictionary of log names and URLs. + :rtype: Optional[Dict[str, str]] + """ + return self._log_files + + @property + def studio_url(self) -> Optional[str]: + """Azure ML studio endpoint. + + :return: The URL to the job details page. + :rtype: Optional[str] + """ + if self.services and (JobServices.STUDIO in self.services.keys()): + res: Optional[str] = self.services[JobServices.STUDIO].endpoint + return res + + return studio_url_from_job_id(self.id) if self.id else None + + def dump(self, dest: Union[str, PathLike, IO[AnyStr]], **kwargs: Any) -> None: + """Dumps the job content into a file in YAML format. + + :param dest: The local path or file stream to write the YAML content to. + If dest is a file path, a new file will be created. + If dest is an open file, the file will be written to directly. + :type dest: Union[PathLike, str, IO[AnyStr]] + :raises FileExistsError: Raised if dest is a file path and the file already exists. + :raises IOError: Raised if dest is an open file and the file is not writable. + """ + path = kwargs.pop("path", None) + yaml_serialized = self._to_dict() + dump_yaml_to_file(dest, yaml_serialized, default_flow_style=False, path=path, **kwargs) + + def _get_base_info_dict(self) -> OrderedDict: + return OrderedDict( + [ + ("Experiment", self.experiment_name), + ("Name", self.name), + ("Type", self._type), + ("Status", self._status), + ] + ) + + def _repr_html_(self) -> str: + info = self._get_base_info_dict() + if self.studio_url: + info.update( + [ + ( + "Details Page", + make_link(self.studio_url, "Link to Azure Machine Learning studio"), + ), + ] + ) + res: str = to_html(info) + return res + + @abstractmethod + def _to_dict(self) -> Dict: + pass + + @classmethod + def _resolve_cls_and_type(cls, data: Dict, params_override: Optional[List[Dict]] = None) -> Tuple: + from azure.ai.ml.entities._builders.command import Command + from azure.ai.ml.entities._builders.spark import Spark + from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob + from azure.ai.ml.entities._job.distillation.distillation_job import DistillationJob + from azure.ai.ml.entities._job.finetuning.finetuning_job import FineTuningJob + from azure.ai.ml.entities._job.import_job import ImportJob + from azure.ai.ml.entities._job.pipeline.pipeline_job import PipelineJob + from azure.ai.ml.entities._job.sweep.sweep_job import SweepJob + + job_type: Optional[Type["Job"]] = None + type_in_override = find_type_in_override(params_override) + type_str = type_in_override or data.get(CommonYamlFields.TYPE, JobType.COMMAND) # override takes the priority + if type_str == JobType.COMMAND: + job_type = Command + elif type_str == JobType.SPARK: + job_type = Spark + elif type_str == JobType.IMPORT: + job_type = ImportJob + elif type_str == JobType.SWEEP: + job_type = SweepJob + elif type_str == JobType.AUTOML: + job_type = AutoMLJob + elif type_str == JobType.PIPELINE: + job_type = PipelineJob + elif type_str == JobType.FINE_TUNING: + job_type = FineTuningJob + elif type_str == JobType.DISTILLATION: + job_type = DistillationJob + else: + msg = f"Unsupported job type: {type_str}." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + return job_type, type_str + + @classmethod + def _load( + cls, + data: Optional[Dict] = None, + yaml_path: Optional[Union[PathLike, str]] = None, + params_override: Optional[list] = None, + **kwargs: Any, + ) -> "Job": + """Load a job object from a yaml file. + + :param cls: Indicates that this is a class method. + :type cls: class + :param data: Data Dictionary, defaults to None + :type data: Dict + :param yaml_path: YAML Path, defaults to None + :type yaml_path: Union[PathLike, str] + :param params_override: Fields to overwrite on top of the yaml file. + Format is [{"field1": "value1"}, {"field2": "value2"}], defaults to None + :type params_override: List[Dict] + :raises Exception: An exception + :return: Loaded job object. + :rtype: Job + """ + data = data or {} + params_override = params_override or [] + context = { + BASE_PATH_CONTEXT_KEY: Path(yaml_path).parent if yaml_path else Path("./"), + PARAMS_OVERRIDE_KEY: params_override, + } + job_type, type_str = cls._resolve_cls_and_type(data, params_override) + job: Job = job_type._load_from_dict( + data=data, + context=context, + additional_message=f"If you are trying to configure a job that is not of type {type_str}, please specify " + f"the correct job type in the 'type' property.", + **kwargs, + ) + if yaml_path: + job._source_path = yaml_path + return job + + @classmethod + def _from_rest_object( # pylint: disable=too-many-return-statements + cls, obj: Union[JobBase, JobBase_2401, Run] + ) -> "Job": + from azure.ai.ml.entities import PipelineJob + from azure.ai.ml.entities._builders.command import Command + from azure.ai.ml.entities._builders.spark import Spark + from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob + from azure.ai.ml.entities._job.base_job import _BaseJob + from azure.ai.ml.entities._job.distillation.distillation_job import DistillationJob + from azure.ai.ml.entities._job.finetuning.finetuning_job import FineTuningJob + from azure.ai.ml.entities._job.import_job import ImportJob + from azure.ai.ml.entities._job.sweep.sweep_job import SweepJob + + try: + if isinstance(obj, Run): + # special handling for child jobs + return _BaseJob._load_from_rest(obj) + if _is_pipeline_child_job(obj): + raise PipelineChildJobError(job_id=obj.id) + if obj.properties.job_type == RestJobType.COMMAND: + # PrP only until new import job type is ready on MFE in PuP + # compute type 'DataFactory' is reserved compute name for 'clusterless' ADF jobs + if obj.properties.compute_id and obj.properties.compute_id.endswith("/" + ComputeType.ADF): + return ImportJob._load_from_rest(obj) + + res_command: Job = Command._load_from_rest_job(obj) + if hasattr(obj, "name"): + res_command._name = obj.name # type: ignore[attr-defined] + return res_command + if obj.properties.job_type == RestJobType.SPARK: + res_spark: Job = Spark._load_from_rest_job(obj) + if hasattr(obj, "name"): + res_spark._name = obj.name # type: ignore[attr-defined] + return res_spark + if obj.properties.job_type == RestJobType.SWEEP: + return SweepJob._load_from_rest(obj) + if obj.properties.job_type == RestJobType.AUTO_ML: + return AutoMLJob._load_from_rest(obj) + if obj.properties.job_type == RestJobType_20240101Preview.FINE_TUNING: + if obj.properties.properties.get("azureml.enable_distillation", False): + return DistillationJob._load_from_rest(obj) + return FineTuningJob._load_from_rest(obj) + if obj.properties.job_type == RestJobType.PIPELINE: + res_pipeline: Job = PipelineJob._load_from_rest(obj) + return res_pipeline + except PipelineChildJobError as ex: + raise ex + except Exception as ex: + error_message = json.dumps(obj.as_dict(), indent=2) if obj else None + module_logger.info( + "Exception: %s.\n%s\nUnable to parse the job resource: %s.\n", + ex, + traceback.format_exc(), + error_message, + ) + raise JobParsingError( + message=str(ex), + no_personal_data_message=f"Unable to parse a job resource of type:{type(obj).__name__}", + error_category=ErrorCategory.SYSTEM_ERROR, + ) from ex + msg = f"Unsupported job type {obj.properties.job_type}" + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.SYSTEM_ERROR, + ) + + def _get_telemetry_values(self) -> Dict: # pylint: disable=arguments-differ + telemetry_values = {"type": self.type} + return telemetry_values + + @classmethod + @abstractmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "Job": + pass diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_io_mixin.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_io_mixin.py new file mode 100644 index 00000000..21db73ba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_io_mixin.py @@ -0,0 +1,37 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + + +from typing import Dict, Union + +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._job._input_output_helpers import build_input_output + + +class JobIOMixin: + @property + def inputs(self) -> Dict[str, Union[Input, str, bool, int, float]]: + return self._inputs + + @inputs.setter + def inputs(self, value: Dict[str, Union[Input, str, bool, int, float]]) -> None: + self._inputs: Dict = {} + if not value: + return + + for input_name, input_value in value.items(): + self._inputs[input_name] = build_input_output(input_value) + + @property + def outputs(self) -> Dict[str, Output]: + return self._outputs + + @outputs.setter + def outputs(self, value: Dict[str, Output]) -> None: + self._outputs: Dict = {} + if not value: + return + + for output_name, output_value in value.items(): + self._outputs[output_name] = build_input_output(output_value, inputs=False) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_limits.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_limits.py new file mode 100644 index 00000000..7aae9263 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_limits.py @@ -0,0 +1,201 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from abc import ABC +from typing import Any, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import CommandJobLimits as RestCommandJobLimits +from azure.ai.ml._restclient.v2023_08_01_preview.models import SweepJobLimits as RestSweepJobLimits +from azure.ai.ml._utils.utils import from_iso_duration_format, is_data_binding_expression, to_iso_duration_format +from azure.ai.ml.constants import JobType +from azure.ai.ml.entities._mixins import RestTranslatableMixin + +module_logger = logging.getLogger(__name__) + + +class JobLimits(RestTranslatableMixin, ABC): + """Base class for Job limits. + + This class should not be instantiated directly. Instead, one of its child classes should be used. + """ + + def __init__( + self, + ) -> None: + self.type: Any = None + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, JobLimits): + return NotImplemented + res: bool = self._to_rest_object() == other._to_rest_object() + return res + + +class CommandJobLimits(JobLimits): + """Limits for Command Jobs. + + :keyword timeout: The maximum run duration, in seconds, after which the job will be cancelled. + :paramtype timeout: Optional[Union[int, str]] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_command_configurations.py + :start-after: [START command_job_definition] + :end-before: [END command_job_definition] + :language: python + :dedent: 8 + :caption: Configuring a CommandJob with CommandJobLimits. + """ + + def __init__(self, *, timeout: Optional[Union[int, str]] = None) -> None: + super().__init__() + self.type = JobType.COMMAND + self.timeout = timeout + + def _to_rest_object(self) -> RestCommandJobLimits: + if is_data_binding_expression(self.timeout): + return RestCommandJobLimits(timeout=self.timeout) + return RestCommandJobLimits(timeout=to_iso_duration_format(self.timeout)) + + @classmethod + def _from_rest_object(cls, obj: Union[RestCommandJobLimits, dict]) -> Optional["CommandJobLimits"]: + if not obj: + return None + if isinstance(obj, dict): + timeout_value = obj.get("timeout", None) + # if timeout value is a binding string + if is_data_binding_expression(timeout_value): + return cls(timeout=timeout_value) + # if response timeout is a normal iso date string + obj = RestCommandJobLimits.from_dict(obj) + return cls(timeout=from_iso_duration_format(obj.timeout)) + + +class SweepJobLimits(JobLimits): + """Limits for Sweep Jobs. + + :keyword max_concurrent_trials: The maximum number of concurrent trials for the Sweep Job. + :paramtype max_concurrent_trials: Optional[int] + :keyword max_total_trials: The maximum number of total trials for the Sweep Job. + :paramtype max_total_trials: Optional[int] + :keyword timeout: The maximum run duration, in seconds, after which the job will be cancelled. + :paramtype timeout: Optional[int] + :keyword trial_timeout: The timeout value, in seconds, for each Sweep Job trial. + :paramtype trial_timeout: Optional[int] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_bayesian_sampling_algorithm] + :end-before: [END configure_sweep_job_bayesian_sampling_algorithm] + :language: python + :dedent: 8 + :caption: Assigning limits to a SweepJob + """ + + def __init__( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_total_trials: Optional[int] = None, + timeout: Optional[int] = None, + trial_timeout: Optional[Union[int, str]] = None, + ) -> None: + super().__init__() + self.type = JobType.SWEEP + self.max_concurrent_trials = max_concurrent_trials + self.max_total_trials = max_total_trials + self._timeout = _get_floored_timeout(timeout) + self._trial_timeout = _get_floored_timeout(trial_timeout) + + @property + def timeout(self) -> Optional[Union[int, str]]: + """The maximum run duration, in seconds, after which the job will be cancelled. + + :return: The maximum run duration, in seconds, after which the job will be cancelled. + :rtype: int + """ + return self._timeout + + @timeout.setter + def timeout(self, value: int) -> None: + """Sets the maximum run duration. + + :param value: The maximum run duration, in seconds, after which the job will be cancelled. + :type value: int + """ + self._timeout = _get_floored_timeout(value) + + @property + def trial_timeout(self) -> Optional[Union[int, str]]: + """The timeout value, in seconds, for each Sweep Job trial. + + :return: The timeout value, in seconds, for each Sweep Job trial. + :rtype: int + """ + return self._trial_timeout + + @trial_timeout.setter + def trial_timeout(self, value: int) -> None: + """Sets the timeout value for each Sweep Job trial. + + :param value: The timeout value, in seconds, for each Sweep Job trial. + :type value: int + """ + self._trial_timeout = _get_floored_timeout(value) + + def _to_rest_object(self) -> RestSweepJobLimits: + return RestSweepJobLimits( + max_concurrent_trials=self.max_concurrent_trials, + max_total_trials=self.max_total_trials, + timeout=to_iso_duration_format(self.timeout), + trial_timeout=to_iso_duration_format(self.trial_timeout), + ) + + @classmethod + def _from_rest_object(cls, obj: RestSweepJobLimits) -> Optional["SweepJobLimits"]: + if not obj: + return None + + return cls( + max_concurrent_trials=obj.max_concurrent_trials, + max_total_trials=obj.max_total_trials, + timeout=from_iso_duration_format(obj.timeout), + trial_timeout=from_iso_duration_format(obj.trial_timeout), + ) + + +def _get_floored_timeout(value: Optional[Union[int, str]]) -> Optional[Union[int, str]]: + # Bug 1335978: Service rounds durations less than 60 seconds to 60 days. + # If duration is non-0 and less than 60, set to 60. + if isinstance(value, int): + return value if not value or value > 60 else 60 + + return None + + +class DoWhileJobLimits(JobLimits): + """DoWhile Job limit class. + + :keyword max_iteration_count: The maximum number of iterations for the DoWhile Job. + :paramtype max_iteration_count: Optional[int] + """ + + def __init__( + self, # pylint: disable=unused-argument + *, + max_iteration_count: Optional[int] = None, + **kwargs: Any, + ) -> None: + super().__init__() + self._max_iteration_count = max_iteration_count + + @property + def max_iteration_count(self) -> Optional[int]: + """The maximum number of iterations for the DoWhile Job. + + :rtype: int + """ + return self._max_iteration_count diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_name_generator.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_name_generator.py new file mode 100644 index 00000000..e4f62d3d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_name_generator.py @@ -0,0 +1,487 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import random + +SUFFIX_LENGTH = 10 +ALLOWED_CHARS = "bcdfghjklmnpqrstvwxyz0123456789" + +ALLOWED_ADJECTIVES = [ + "affable", + "amiable", + "amusing", + "ashy", + "blue", + "bold", + "boring", + "brave", + "bright", + "bubbly", + "busy", + "calm", + "careful", + "clever", + "cool", + "coral", + "crimson", + "cyan", + "dreamy", + "dynamic", + "eager", + "elated", + "epic", + "frank", + "frosty", + "funny", + "gentle", + "gifted", + "good", + "goofy", + "gray", + "great", + "green", + "happy", + "helpful", + "heroic", + "honest", + "hungry", + "icy", + "ivory", + "jolly", + "jovial", + "joyful", + "keen", + "khaki", + "kind", + "lemon", + "lime", + "loving", + "loyal", + "lucid", + "magenta", + "mango", + "maroon", + "mighty", + "modest", + "musing", + "neat", + "nice", + "nifty", + "olden", + "olive", + "orange", + "patient", + "placid", + "plucky", + "plum", + "polite", + "purple", + "quiet", + "quirky", + "red", + "sad", + "salmon", + "serene", + "sharp", + "shy", + "silly", + "silver", + "sincere", + "sleepy", + "stoic", + "strong", + "sweet", + "teal", + "tender", + "tidy", + "tough", + "upbeat", + "wheat", + "willing", + "witty", + "yellow", + "zen", +] + +ALLOWED_NOUNS = [ + "actor", + "airport", + "angle", + "animal", + "answer", + "ant", + "apple", + "apricot", + "arch", + "arm", + "atemoya", + "avocado", + "bag", + "ball", + "balloon", + "band", + "basil", + "basin", + "basket", + "battery", + "beach", + "bean", + "bear", + "beard", + "bee", + "beet", + "bell", + "berry", + "bird", + "board", + "boat", + "bone", + "boniato", + "book", + "boot", + "bottle", + "box", + "brain", + "brake", + "branch", + "bread", + "brick", + "bridge", + "brush", + "bucket", + "bulb", + "button", + "cabbage", + "cake", + "calypso", + "camel", + "camera", + "candle", + "car", + "caravan", + "card", + "carnival", + "carpet", + "carrot", + "cart", + "cartoon", + "cassava", + "cat", + "celery", + "chaconia", + "chain", + "chayote", + "cheese", + "cheetah", + "cherry", + "chicken", + "chin", + "circle", + "clock", + "cloud", + "coat", + "coconut", + "collar", + "comb", + "cord", + "corn", + "cow", + "crayon", + "crowd", + "cumin", + "cup", + "curtain", + "cushion", + "date", + "deer", + "diamond", + "dinner", + "dog", + "dolphin", + "door", + "double", + "drain", + "drawer", + "dream", + "dress", + "drop", + "duck", + "eagle", + "ear", + "egg", + "endive", + "energy", + "engine", + "evening", + "eye", + "farm", + "feast", + "feather", + "feijoa", + "fennel", + "fig", + "fish", + "flag", + "floor", + "flower", + "fly", + "foot", + "forest", + "fork", + "fowl", + "fox", + "frame", + "frog", + "garage", + "garden", + "garlic", + "gas", + "ghost", + "giraffe", + "glass", + "glove", + "goat", + "gold", + "grape", + "grass", + "guava", + "guitar", + "gyro", + "hair", + "hamster", + "hand", + "hat", + "head", + "heart", + "helmet", + "holiday", + "hominy", + "honey", + "hook", + "horse", + "house", + "ice", + "insect", + "iron", + "island", + "jackal", + "jelly", + "jewel", + "jicama", + "juice", + "kale", + "kettle", + "key", + "king", + "kitchen", + "kite", + "kitten", + "kiwi", + "knee", + "knot", + "kumquat", + "lamp", + "leaf", + "leather", + "leek", + "leg", + "lemon", + "lettuce", + "library", + "lime", + "line", + "lion", + "lizard", + "lobster", + "lock", + "longan", + "loquat", + "lunch", + "lychee", + "machine", + "malanga", + "mango", + "mangos", + "map", + "market", + "match", + "melon", + "milk", + "monkey", + "moon", + "morning", + "muscle", + "music", + "nail", + "napa", + "napkin", + "neck", + "needle", + "nerve", + "nest", + "net", + "night", + "nose", + "nut", + "nutmeg", + "ocean", + "octopus", + "office", + "oil", + "okra", + "onion", + "orange", + "oregano", + "oven", + "owl", + "oxygen", + "oyster", + "panda", + "papaya", + "parang", + "parcel", + "parrot", + "parsnip", + "pasta", + "pea", + "peach", + "pear", + "pen", + "pencil", + "pepper", + "piano", + "picture", + "pig", + "pillow", + "pin", + "pipe", + "pizza", + "plane", + "planet", + "plastic", + "plate", + "plow", + "plum", + "pocket", + "pot", + "potato", + "prune", + "pummelo", + "pump", + "pumpkin", + "puppy", + "queen", + "quill", + "quince", + "rabbit", + "rail", + "rain", + "rainbow", + "raisin", + "rat", + "receipt", + "reggae", + "rhubarb", + "rhythm", + "rice", + "ring", + "river", + "rocket", + "rod", + "roof", + "room", + "root", + "rose", + "roti", + "sail", + "salt", + "sand", + "school", + "scooter", + "screw", + "seal", + "seed", + "shampoo", + "shark", + "sheep", + "shelf", + "ship", + "shirt", + "shoe", + "skin", + "snail", + "snake", + "soca", + "soccer", + "sock", + "soursop", + "spade", + "spider", + "spinach", + "sponge", + "spoon", + "spring", + "sprout", + "square", + "squash", + "stamp", + "star", + "station", + "steelpan", + "stem", + "stick", + "stomach", + "stone", + "store", + "street", + "sugar", + "sun", + "table", + "tail", + "tangelo", + "tent", + "thread", + "ticket", + "tiger", + "toe", + "tomato", + "tongue", + "tooth", + "town", + "train", + "tray", + "tree", + "truck", + "turnip", + "turtle", + "van", + "vase", + "vinegar", + "vulture", + "wall", + "watch", + "whale", + "wheel", + "whistle", + "window", + "wing", + "wire", + "wolf", + "worm", + "yacht", + "yak", + "yam", + "yogurt", + "yuca", + "zebra", + "zoo", +] + + +def generate_job_name() -> str: + adj = random.choice(ALLOWED_ADJECTIVES) + noun = random.choice(ALLOWED_NOUNS) + suffix = "".join(random.choices(ALLOWED_CHARS, k=SUFFIX_LENGTH)) + + return "_".join([adj, noun, suffix]) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_resource_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_resource_configuration.py new file mode 100644 index 00000000..a27b5ba1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_resource_configuration.py @@ -0,0 +1,239 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import json +import logging +from typing import Any, Dict, List, Optional, Union, cast + +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobResourceConfiguration as RestJobResourceConfiguration +from azure.ai.ml._restclient.v2025_01_01_preview.models import ( + JobResourceConfiguration as RestJobResourceConfiguration202501, +) +from azure.ai.ml.constants._job.job import JobComputePropertyFields +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin +from azure.ai.ml.entities._util import convert_ordered_dict_to_dict + +module_logger = logging.getLogger(__name__) + + +class BaseProperty(dict): + """Base class for entity classes to be used as value of JobResourceConfiguration.properties.""" + + def __init__(self, **kwargs: Any) -> None: + super().__init__() + for key, value in kwargs.items(): + setattr(self, key, value) + + def __setattr__(self, key: str, value: Any) -> None: + if key.startswith("_"): + super().__setattr__(key, value) + else: + self[key] = value + + def __getattr__(self, key: str) -> Any: + if key.startswith("_"): + super().__getattribute__(key) + return None + + return self[key] + + def __repr__(self) -> str: + return json.dumps(self.as_dict()) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, dict): + return self.as_dict() == other + if isinstance(other, BaseProperty): + return self.as_dict() == other.as_dict() + return False + + def as_dict(self) -> Dict[str, Any]: + res: dict = self._to_dict(self) + return res + + @classmethod + def _to_dict(cls, obj: Any) -> Any: + if isinstance(obj, dict): + result = {} + for key, value in obj.items(): + if value is None: + continue + if isinstance(value, dict): + result[key] = cls._to_dict(value) + else: + result[key] = value + return result + return obj + + +class Properties(BaseProperty): + # pre-defined properties are case-insensitive + # Map Singularity -> AISupercomputer in SDK until MFE does mapping + _KEY_MAPPING = { + JobComputePropertyFields.AISUPERCOMPUTER.lower(): JobComputePropertyFields.AISUPERCOMPUTER, + JobComputePropertyFields.SINGULARITY.lower(): JobComputePropertyFields.AISUPERCOMPUTER, + JobComputePropertyFields.ITP.lower(): JobComputePropertyFields.ITP, + JobComputePropertyFields.TARGET_SELECTOR.lower(): JobComputePropertyFields.TARGET_SELECTOR, + } + + def as_dict(self) -> Dict[str, Any]: + result = {} + for key, value in super().as_dict().items(): + if key.lower() in self._KEY_MAPPING: + key = self._KEY_MAPPING[key.lower()] + result[key] = value + # recursively convert Ordered Dict to dictionary + return cast(dict, convert_ordered_dict_to_dict(result)) + + +class JobResourceConfiguration(RestTranslatableMixin, DictMixin): + """Job resource configuration class, inherited and extended functionalities from ResourceConfiguration. + + :keyword locations: A list of locations where the job can run. + :paramtype locations: Optional[List[str]] + :keyword instance_count: The number of instances or nodes used by the compute target. + :paramtype instance_count: Optional[int] + :keyword instance_type: The type of VM to be used, as supported by the compute target. + :paramtype instance_type: Optional[str] + :keyword properties: A dictionary of properties for the job. + :paramtype properties: Optional[dict[str, Any]] + :keyword docker_args: Extra arguments to pass to the Docker run command. This would override any + parameters that have already been set by the system, or in this section. This parameter is only + supported for Azure ML compute types. + :paramtype docker_args: Optional[Union[str, List[str]]] + :keyword shm_size: The size of the docker container's shared memory block. This should be in the + format of (number)(unit) where the number has to be greater than 0 and the unit can be one of + b(bytes), k(kilobytes), m(megabytes), or g(gigabytes). + :paramtype shm_size: Optional[str] + :keyword max_instance_count: The maximum number of instances or nodes used by the compute target. + :paramtype max_instance_count: Optional[int] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_command_configurations.py + :start-after: [START command_job_resource_configuration] + :end-before: [END command_job_resource_configuration] + :language: python + :dedent: 8 + :caption: Configuring a CommandJob with a JobResourceConfiguration. + """ + + def __init__( + self, # pylint: disable=unused-argument + *, + locations: Optional[List[str]] = None, + instance_count: Optional[int] = None, + instance_type: Optional[Union[str, List]] = None, + properties: Optional[Union[Properties, Dict]] = None, + docker_args: Optional[Union[str, List[str]]] = None, + shm_size: Optional[str] = None, + max_instance_count: Optional[int] = None, + **kwargs: Any + ) -> None: + self.locations = locations + self.instance_count = instance_count + self.instance_type = instance_type + self.shm_size = shm_size + self.max_instance_count = max_instance_count + self.docker_args = docker_args + self._properties = None + self.properties = properties + + @property + def properties(self) -> Optional[Union[Properties, Dict]]: + """The properties of the job. + + :rtype: ~azure.ai.ml.entities._job.job_resource_configuration.Properties + """ + return self._properties + + @properties.setter + def properties(self, properties: Dict[str, Any]) -> None: + """Sets the properties of the job. + + :param properties: A dictionary of properties for the job. + :type properties: Dict[str, Any] + :raises TypeError: Raised if properties is not a dictionary type. + """ + if properties is None: + self._properties = Properties() + elif isinstance(properties, dict): + self._properties = Properties(**properties) + else: + raise TypeError("properties must be a dict.") + + def _to_rest_object(self) -> Union[RestJobResourceConfiguration, RestJobResourceConfiguration202501]: + if self.docker_args and isinstance(self.docker_args, list): + return RestJobResourceConfiguration202501( + instance_count=self.instance_count, + instance_type=self.instance_type, + max_instance_count=self.max_instance_count, + properties=self.properties.as_dict() if isinstance(self.properties, Properties) else None, + docker_args_list=self.docker_args, + shm_size=self.shm_size, + ) + return RestJobResourceConfiguration( + locations=self.locations, + instance_count=self.instance_count, + instance_type=self.instance_type, + max_instance_count=self.max_instance_count, + properties=self.properties.as_dict() if isinstance(self.properties, Properties) else None, + docker_args=self.docker_args, + shm_size=self.shm_size, + ) + + @classmethod + def _from_rest_object( + cls, obj: Optional[Union[RestJobResourceConfiguration, RestJobResourceConfiguration202501]] + ) -> Optional["JobResourceConfiguration"]: + if obj is None: + return None + if isinstance(obj, dict): + return cls(**obj) + return JobResourceConfiguration( + locations=obj.locations if hasattr(obj, "locations") else None, + instance_count=obj.instance_count, + instance_type=obj.instance_type, + max_instance_count=obj.max_instance_count if hasattr(obj, "max_instance_count") else None, + properties=obj.properties, + docker_args=obj.docker_args_list if hasattr(obj, "docker_args_list") else obj.docker_args, + shm_size=obj.shm_size, + deserialize_properties=True, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, JobResourceConfiguration): + return NotImplemented + return ( + self.locations == other.locations + and self.instance_count == other.instance_count + and self.instance_type == other.instance_type + and self.max_instance_count == other.max_instance_count + and self.docker_args == other.docker_args + and self.shm_size == other.shm_size + ) + + def __ne__(self, other: object) -> bool: + if not isinstance(other, JobResourceConfiguration): + return NotImplemented + return not self.__eq__(other) + + def _merge_with(self, other: "JobResourceConfiguration") -> None: + if other: + if other.locations: + self.locations = other.locations + if other.instance_count: + self.instance_count = other.instance_count + if other.instance_type: + self.instance_type = other.instance_type + if other.max_instance_count: + self.max_instance_count = other.max_instance_count + if other.properties: + self.properties = other.properties + if other.docker_args: + self.docker_args = other.docker_args + if other.shm_size: + self.shm_size = other.shm_size diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_resources.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_resources.py new file mode 100644 index 00000000..bd1cdad5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_resources.py @@ -0,0 +1,33 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Any, List +from azure.ai.ml.entities._mixins import RestTranslatableMixin +from azure.ai.ml._restclient.v2024_10_01_preview.models import JobResources as RestJobResources + + +class JobResources(RestTranslatableMixin): + """Resource configuration for a job. + + This class should not be instantiated directly. Instead, use its subclasses. + """ + + def __init__(self, *, instance_types: List[str]) -> None: + self.instance_types = instance_types + + def _to_rest_object(self) -> Any: + return RestJobResources(instance_types=self.instance_types) + + @classmethod + def _from_rest_object(cls, obj: RestJobResources) -> "JobResources": + job_resources = cls(instance_types=obj.instance_types) + return job_resources + + def __eq__(self, other: object) -> bool: + if not isinstance(other, JobResources): + return NotImplemented + return self.instance_types == other.instance_types + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_service.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_service.py new file mode 100644 index 00000000..a97048fc --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/job_service.py @@ -0,0 +1,424 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import logging +from typing import Any, Dict, Optional, cast + +from typing_extensions import Literal + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AllNodes +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobService as RestJobService +from azure.ai.ml.constants._job.job import JobServiceTypeNames +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException + +module_logger = logging.getLogger(__name__) + + +class JobServiceBase(RestTranslatableMixin, DictMixin): + """Base class for job service configuration. + + This class should not be instantiated directly. Instead, use one of its subclasses. + + :keyword endpoint: The endpoint URL. + :paramtype endpoint: Optional[str] + :keyword type: The endpoint type. Accepted values are "jupyter_lab", "ssh", "tensor_board", and "vs_code". + :paramtype type: Optional[Literal["jupyter_lab", "ssh", "tensor_board", "vs_code"]] + :keyword port: The port for the endpoint. + :paramtype port: Optional[int] + :keyword nodes: Indicates whether the service has to run in all nodes. + :paramtype nodes: Optional[Literal["all"]] + :keyword properties: Additional properties to set on the endpoint. + :paramtype properties: Optional[dict[str, str]] + :keyword status: The status of the endpoint. + :paramtype status: Optional[str] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + """ + + def __init__( # pylint: disable=unused-argument + self, + *, + endpoint: Optional[str] = None, + type: Optional[ # pylint: disable=redefined-builtin + Literal["jupyter_lab", "ssh", "tensor_board", "vs_code"] + ] = None, + nodes: Optional[Literal["all"]] = None, + status: Optional[str] = None, + port: Optional[int] = None, + properties: Optional[Dict[str, str]] = None, + **kwargs: Dict, + ) -> None: + self.endpoint = endpoint + self.type: Any = type + self.nodes = nodes + self.status = status + self.port = port + self.properties = properties + self._validate_nodes() + self._validate_type_name() + + def _validate_nodes(self) -> None: + if not self.nodes in ["all", None]: + msg = f"nodes should be either 'all' or None, but received '{self.nodes}'." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + def _validate_type_name(self) -> None: + if self.type and not self.type in JobServiceTypeNames.ENTITY_TO_REST: + msg = ( + f"type should be one of " f"{JobServiceTypeNames.NAMES_ALLOWED_FOR_PUBLIC}, but received '{self.type}'." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + def _to_rest_job_service(self, updated_properties: Optional[Dict[str, str]] = None) -> RestJobService: + return RestJobService( + endpoint=self.endpoint, + job_service_type=JobServiceTypeNames.ENTITY_TO_REST.get(self.type, None) if self.type else None, + nodes=AllNodes() if self.nodes else None, + status=self.status, + port=self.port, + properties=updated_properties if updated_properties else self.properties, + ) + + @classmethod + def _to_rest_job_services( + cls, + services: Optional[Dict], + ) -> Optional[Dict[str, RestJobService]]: + if services is None: + return None + + return {name: service._to_rest_object() for name, service in services.items()} + + @classmethod + def _from_rest_job_service_object(cls, obj: RestJobService) -> "JobServiceBase": + return cls( + endpoint=obj.endpoint, + type=( + JobServiceTypeNames.REST_TO_ENTITY.get(obj.job_service_type, None) # type: ignore[arg-type] + if obj.job_service_type + else None + ), + nodes="all" if obj.nodes else None, + status=obj.status, + port=obj.port, + # ssh_public_keys=_get_property(obj.properties, "sshPublicKeys"), + properties=obj.properties, + ) + + @classmethod + def _from_rest_job_services(cls, services: Dict[str, RestJobService]) -> Dict: + # """Resolve Dict[str, RestJobService] to Dict[str, Specific JobService]""" + if services is None: + return None + + result: dict = {} + for name, service in services.items(): + if service.job_service_type == JobServiceTypeNames.RestNames.JUPYTER_LAB: + result[name] = JupyterLabJobService._from_rest_object(service) + elif service.job_service_type == JobServiceTypeNames.RestNames.SSH: + result[name] = SshJobService._from_rest_object(service) + elif service.job_service_type == JobServiceTypeNames.RestNames.TENSOR_BOARD: + result[name] = TensorBoardJobService._from_rest_object(service) + elif service.job_service_type == JobServiceTypeNames.RestNames.VS_CODE: + result[name] = VsCodeJobService._from_rest_object(service) + else: + result[name] = JobService._from_rest_object(service) + return result + + +class JobService(JobServiceBase): + """Basic job service configuration for backward compatibility. + + This class is not intended to be used directly. Instead, use one of its subclasses specific to your job type. + + :keyword endpoint: The endpoint URL. + :paramtype endpoint: Optional[str] + :keyword type: The endpoint type. Accepted values are "jupyter_lab", "ssh", "tensor_board", and "vs_code". + :paramtype type: Optional[Literal["jupyter_lab", "ssh", "tensor_board", "vs_code"]] + :keyword port: The port for the endpoint. + :paramtype port: Optional[int] + :keyword nodes: Indicates whether the service has to run in all nodes. + :paramtype nodes: Optional[Literal["all"]] + :keyword properties: Additional properties to set on the endpoint. + :paramtype properties: Optional[dict[str, str]] + :keyword status: The status of the endpoint. + :paramtype status: Optional[str] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + """ + + @classmethod + def _from_rest_object(cls, obj: RestJobService) -> "JobService": + return cast(JobService, cls._from_rest_job_service_object(obj)) + + def _to_rest_object(self) -> RestJobService: + return self._to_rest_job_service() + + +class SshJobService(JobServiceBase): + """SSH job service configuration. + + :ivar type: Specifies the type of job service. Set automatically to "ssh" for this class. + :vartype type: str + :keyword endpoint: The endpoint URL. + :paramtype endpoint: Optional[str] + :keyword port: The port for the endpoint. + :paramtype port: Optional[int] + :keyword nodes: Indicates whether the service has to run in all nodes. + :paramtype nodes: Optional[Literal["all"]] + :keyword properties: Additional properties to set on the endpoint. + :paramtype properties: Optional[dict[str, str]] + :keyword status: The status of the endpoint. + :paramtype status: Optional[str] + :keyword ssh_public_keys: The SSH Public Key to access the job container. + :paramtype ssh_public_keys: Optional[str] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_misc.py + :start-after: [START ssh_job_service_configuration] + :end-before: [END ssh_job_service_configuration] + :language: python + :dedent: 8 + :caption: Configuring a SshJobService configuration on a command job. + """ + + def __init__( + self, + *, + endpoint: Optional[str] = None, + nodes: Optional[Literal["all"]] = None, + status: Optional[str] = None, + port: Optional[int] = None, + ssh_public_keys: Optional[str] = None, + properties: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> None: + super().__init__( + endpoint=endpoint, + nodes=nodes, + status=status, + port=port, + properties=properties, + **kwargs, + ) + self.type = JobServiceTypeNames.EntityNames.SSH + self.ssh_public_keys = ssh_public_keys + + @classmethod + def _from_rest_object(cls, obj: RestJobService) -> "SshJobService": + ssh_job_service = cast(SshJobService, cls._from_rest_job_service_object(obj)) + ssh_job_service.ssh_public_keys = _get_property(obj.properties, "sshPublicKeys") + return ssh_job_service + + def _to_rest_object(self) -> RestJobService: + updated_properties = _append_or_update_properties(self.properties, "sshPublicKeys", self.ssh_public_keys) + return self._to_rest_job_service(updated_properties) + + +class TensorBoardJobService(JobServiceBase): + """TensorBoard job service configuration. + + :ivar type: Specifies the type of job service. Set automatically to "tensor_board" for this class. + :vartype type: str + :keyword endpoint: The endpoint URL. + :paramtype endpoint: Optional[str] + :keyword port: The port for the endpoint. + :paramtype port: Optional[int] + :keyword nodes: Indicates whether the service has to run in all nodes. + :paramtype nodes: Optional[Literal["all"]] + :keyword properties: Additional properties to set on the endpoint. + :paramtype properties: Optional[dict[str, str]] + :keyword status: The status of the endpoint. + :paramtype status: Optional[str] + :keyword log_dir: The directory path for the log file. + :paramtype log_dir: Optional[str] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_misc.py + :start-after: [START ssh_job_service_configuration] + :end-before: [END ssh_job_service_configuration] + :language: python + :dedent: 8 + :caption: Configuring TensorBoardJobService configuration on a command job. + """ + + def __init__( + self, + *, + endpoint: Optional[str] = None, + nodes: Optional[Literal["all"]] = None, + status: Optional[str] = None, + port: Optional[int] = None, + log_dir: Optional[str] = None, + properties: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> None: + super().__init__( + endpoint=endpoint, + nodes=nodes, + status=status, + port=port, + properties=properties, + **kwargs, + ) + self.type = JobServiceTypeNames.EntityNames.TENSOR_BOARD + self.log_dir = log_dir + + @classmethod + def _from_rest_object(cls, obj: RestJobService) -> "TensorBoardJobService": + tensorboard_job_Service = cast(TensorBoardJobService, cls._from_rest_job_service_object(obj)) + tensorboard_job_Service.log_dir = _get_property(obj.properties, "logDir") + return tensorboard_job_Service + + def _to_rest_object(self) -> RestJobService: + updated_properties = _append_or_update_properties(self.properties, "logDir", self.log_dir) + return self._to_rest_job_service(updated_properties) + + +class JupyterLabJobService(JobServiceBase): + """JupyterLab job service configuration. + + :ivar type: Specifies the type of job service. Set automatically to "jupyter_lab" for this class. + :vartype type: str + :keyword endpoint: The endpoint URL. + :paramtype endpoint: Optional[str] + :keyword port: The port for the endpoint. + :paramtype port: Optional[int] + :keyword nodes: Indicates whether the service has to run in all nodes. + :paramtype nodes: Optional[Literal["all"]] + :keyword properties: Additional properties to set on the endpoint. + :paramtype properties: Optional[dict[str, str]] + :keyword status: The status of the endpoint. + :paramtype status: Optional[str] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_misc.py + :start-after: [START ssh_job_service_configuration] + :end-before: [END ssh_job_service_configuration] + :language: python + :dedent: 8 + :caption: Configuring JupyterLabJobService configuration on a command job. + """ + + def __init__( + self, + *, + endpoint: Optional[str] = None, + nodes: Optional[Literal["all"]] = None, + status: Optional[str] = None, + port: Optional[int] = None, + properties: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> None: + super().__init__( + endpoint=endpoint, + nodes=nodes, + status=status, + port=port, + properties=properties, + **kwargs, + ) + self.type = JobServiceTypeNames.EntityNames.JUPYTER_LAB + + @classmethod + def _from_rest_object(cls, obj: RestJobService) -> "JupyterLabJobService": + return cast(JupyterLabJobService, cls._from_rest_job_service_object(obj)) + + def _to_rest_object(self) -> RestJobService: + return self._to_rest_job_service() + + +class VsCodeJobService(JobServiceBase): + """VS Code job service configuration. + + :ivar type: Specifies the type of job service. Set automatically to "vs_code" for this class. + :vartype type: str + :keyword endpoint: The endpoint URL. + :paramtype endpoint: Optional[str] + :keyword port: The port for the endpoint. + :paramtype port: Optional[int] + :keyword nodes: Indicates whether the service has to run in all nodes. + :paramtype nodes: Optional[Literal["all"]] + :keyword properties: Additional properties to set on the endpoint. + :paramtype properties: Optional[dict[str, str]] + :keyword status: The status of the endpoint. + :paramtype status: Optional[str] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_misc.py + :start-after: [START ssh_job_service_configuration] + :end-before: [END ssh_job_service_configuration] + :language: python + :dedent: 8 + :caption: Configuring a VsCodeJobService configuration on a command job. + """ + + def __init__( + self, + *, + endpoint: Optional[str] = None, + nodes: Optional[Literal["all"]] = None, + status: Optional[str] = None, + port: Optional[int] = None, + properties: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> None: + super().__init__( + endpoint=endpoint, + nodes=nodes, + status=status, + port=port, + properties=properties, + **kwargs, + ) + self.type = JobServiceTypeNames.EntityNames.VS_CODE + + @classmethod + def _from_rest_object(cls, obj: RestJobService) -> "VsCodeJobService": + return cast(VsCodeJobService, cls._from_rest_job_service_object(obj)) + + def _to_rest_object(self) -> RestJobService: + return self._to_rest_job_service() + + +def _append_or_update_properties( + properties: Optional[Dict[str, str]], key: str, value: Optional[str] +) -> Dict[str, str]: + if value and not properties: + properties = {key: value} + + if value and properties: + properties.update({key: value}) + return properties if properties is not None else {} + + +def _get_property(properties: Dict[str, str], key: str) -> Optional[str]: + return properties.get(key, None) if properties else None diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parallel_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parallel_job.py new file mode 100644 index 00000000..49b2c992 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parallel_job.py @@ -0,0 +1,244 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2022_02_01_preview.models import JobBaseData +from azure.ai.ml._schema.job.parallel_job import ParallelJobSchema +from azure.ai.ml._utils.utils import is_data_binding_expression +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.entities._credentials import ( + AmlTokenConfiguration, + ManagedIdentityConfiguration, + UserIdentityConfiguration, +) +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException + +from ..job import Job +from ..job_io_mixin import JobIOMixin +from .parameterized_parallel import ParameterizedParallel + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._builders import Parallel + from azure.ai.ml.entities._component.parallel_component import ParallelComponent + +module_logger = logging.getLogger(__name__) + + +class ParallelJob(Job, ParameterizedParallel, JobIOMixin): + """Parallel job. + + :param name: Name of the job. + :type name: str + :param version: Version of the job. + :type version: str + :param id: Global id of the resource, Azure Resource Manager ID. + :type id: str + :param type: Type of the job, supported is 'parallel'. + :type type: str + :param description: Description of the job. + :type description: str + :param tags: Internal use only. + :type tags: dict + :param properties: Internal use only. + :type properties: dict + :param display_name: Display name of the job. + :type display_name: str + :param retry_settings: parallel job run failed retry + :type retry_settings: BatchRetrySettings + :param logging_level: A string of the logging level name + :type logging_level: str + :param max_concurrency_per_instance: The max parallellism that each compute instance has. + :type max_concurrency_per_instance: int + :param error_threshold: The number of item processing failures should be ignored. + :type error_threshold: int + :param mini_batch_error_threshold: The number of mini batch processing failures should be ignored. + :type mini_batch_error_threshold: int + :keyword identity: The identity that the job will use while running on compute. + :paramtype identity: Optional[Union[~azure.ai.ml.ManagedIdentityConfiguration, ~azure.ai.ml.AmlTokenConfiguration, + ~azure.ai.ml.UserIdentityConfiguration]] + :param task: The parallel task. + :type task: ParallelTask + :param mini_batch_size: The mini batch size. + :type mini_batch_size: str + :param partition_keys: The partition keys. + :type partition_keys: list + :param input_data: The input data. + :type input_data: str + :param inputs: Inputs of the job. + :type inputs: dict + :param outputs: Outputs of the job. + :type outputs: dict + """ + + def __init__( + self, + *, + inputs: Optional[Dict[str, Union[Input, str, bool, int, float]]] = None, + outputs: Optional[Dict[str, Output]] = None, + identity: Optional[ + Union[ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration, Dict] + ] = None, + **kwargs: Any, + ): + kwargs[TYPE] = JobType.PARALLEL + + super().__init__(**kwargs) + + self.inputs = inputs # type: ignore[assignment] + self.outputs = outputs # type: ignore[assignment] + self.identity = identity + + def _to_dict(self) -> Dict: + res: dict = ParallelJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def _to_rest_object(self) -> None: + pass + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "ParallelJob": + loaded_data = load_from_dict(ParallelJobSchema, data, context, additional_message, **kwargs) + return ParallelJob(base_path=context[BASE_PATH_CONTEXT_KEY], **loaded_data) + + @classmethod + def _load_from_rest(cls, obj: JobBaseData) -> None: + pass + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "ParallelComponent": + """Translate a parallel job to component job. + + :param context: Context of parallel job YAML file. + :type context: dict + :return: Translated parallel component. + :rtype: ParallelComponent + """ + from azure.ai.ml.entities._component.parallel_component import ParallelComponent + + pipeline_job_dict = kwargs.get("pipeline_job_dict", {}) + context = context or {BASE_PATH_CONTEXT_KEY: Path("./")} + + # Create anonymous parallel component with default version as 1 + init_kwargs = {} + for key in [ + "mini_batch_size", + "partition_keys", + "logging_level", + "max_concurrency_per_instance", + "error_threshold", + "mini_batch_error_threshold", + "retry_settings", + "resources", + ]: + value = getattr(self, key) + from azure.ai.ml.entities import BatchRetrySettings, JobResourceConfiguration + + values_to_check: List = [] + if key == "retry_settings" and isinstance(value, BatchRetrySettings): + values_to_check = [value.max_retries, value.timeout] + elif key == "resources" and isinstance(value, JobResourceConfiguration): + values_to_check = [ + value.locations, + value.instance_count, + value.instance_type, + value.shm_size, + value.max_instance_count, + value.docker_args, + ] + else: + values_to_check = [value] + + # note that component level attributes can not be data binding expressions + # so filter out data binding expression properties here; + # they will still take effect at node level according to _to_node + if any( + map( + lambda x: is_data_binding_expression(x, binding_prefix=["parent", "inputs"], is_singular=False) + or is_data_binding_expression(x, binding_prefix=["inputs"], is_singular=False), + values_to_check, + ) + ): + continue + + init_kwargs[key] = getattr(self, key) + + return ParallelComponent( + base_path=context[BASE_PATH_CONTEXT_KEY], + # for parallel_job.task, all attributes for this are string for now so data binding expression is allowed + # in SDK level naturally, but not sure if such component is valid. leave the validation to service side. + task=self.task, + inputs=self._to_inputs(inputs=self.inputs, pipeline_job_dict=pipeline_job_dict), + outputs=self._to_outputs(outputs=self.outputs, pipeline_job_dict=pipeline_job_dict), + input_data=self.input_data, + # keep them if no data binding expression detected to keep the behavior of to_component + **init_kwargs, + ) + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "Parallel": + """Translate a parallel job to a pipeline node. + + :param context: Context of parallel job YAML file. + :type context: dict + :return: Translated parallel component. + :rtype: Parallel + """ + from azure.ai.ml.entities._builders import Parallel + + component = self._to_component(context, **kwargs) + + return Parallel( + component=component, + compute=self.compute, + # Need to supply the inputs with double curly. + inputs=self.inputs, # type: ignore[arg-type] + outputs=self.outputs, # type: ignore[arg-type] + mini_batch_size=self.mini_batch_size, + partition_keys=self.partition_keys, + input_data=self.input_data, + # task will be inherited from component & base_path will be set correctly. + retry_settings=self.retry_settings, + logging_level=self.logging_level, + max_concurrency_per_instance=self.max_concurrency_per_instance, + error_threshold=self.error_threshold, + mini_batch_error_threshold=self.mini_batch_error_threshold, + environment_variables=self.environment_variables, + properties=self.properties, + identity=self.identity, + resources=self.resources if self.resources and not isinstance(self.resources, dict) else None, + ) + + def _validate(self) -> None: + if self.name is None: + msg = "Job name is required" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.MISSING_FIELD, + ) + if self.compute is None: + msg = "compute is required" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.MISSING_FIELD, + ) + if self.task is None: + msg = "task is required" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.MISSING_FIELD, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parallel_task.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parallel_task.py new file mode 100644 index 00000000..7325aed3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parallel_task.py @@ -0,0 +1,119 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from os import PathLike +from pathlib import Path +from typing import Any, Dict, Optional, Union + +# from azure.ai.ml.entities._deployment.code_configuration import CodeConfiguration +from azure.ai.ml._schema.component.parallel_task import ComponentParallelTaskSchema +from azure.ai.ml._utils.utils import load_yaml +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, PARAMS_OVERRIDE_KEY +from azure.ai.ml.entities._assets.environment import Environment +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class ParallelTask(RestTranslatableMixin, DictMixin): + """Parallel task. + + :param type: The type of the parallel task. + Possible values are 'run_function'and 'model'. + :type type: str + :param code: A local or remote path pointing at source code. + :type code: str + :param entry_script: User script which will be run in parallel on multiple nodes. This is + specified as a local file path. + The entry_script should contain two functions: + ``init()``: this function should be used for any costly or common preparation for subsequent inferences, + e.g., deserializing and loading the model into a global object. + ``run(mini_batch)``: The method to be parallelized. Each invocation will have one mini-batch. + 'mini_batch': Batch inference will invoke run method and pass either a list or a Pandas DataFrame as an + argument to the method. Each entry in min_batch will be a filepath if input is a FileDataset, + a Pandas DataFrame if input is a TabularDataset. + run() method should return a Pandas DataFrame or an array. + For append_row output_action, these returned elements are appended into the common output file. + For summary_only, the contents of the elements are ignored. For all output actions, + each returned output element indicates one successful inference of input element in the input mini-batch. + Each parallel worker process will call `init` once and then loop over `run` function until all mini-batches + are processed. + :type entry_script: str + :param program_arguments: The arguments of the parallel task. + :type program_arguments: str + :param model: The model of the parallel task. + :type model: str + :param append_row_to: All values output by run() method invocations will be aggregated into + one unique file which is created in the output location. + if it is not set, 'summary_only' would invoked, which means user script is expected to store the output itself. + :type append_row_to: str + :param environment: Environment that training job will run in. + :type environment: Union[Environment, str] + """ + + def __init__( + self, # pylint: disable=unused-argument + *, + type: Optional[str] = None, # pylint: disable=redefined-builtin + code: Optional[str] = None, + entry_script: Optional[str] = None, + program_arguments: Optional[str] = None, + model: Optional[str] = None, + append_row_to: Optional[str] = None, + environment: Optional[Union[Environment, str]] = None, + **kwargs: Any, + ): + self.type = type + self.code = code + self.entry_script = entry_script + self.program_arguments = program_arguments + self.model = model + self.append_row_to = append_row_to + self.environment: Any = environment + + def _to_dict(self) -> Dict: + # pylint: disable=no-member + res: dict = ComponentParallelTaskSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + @classmethod + def _load( + cls, # pylint: disable=unused-argument + path: Optional[Union[PathLike, str]] = None, + params_override: Optional[list] = None, + **kwargs: Any, + ) -> "ParallelTask": + params_override = params_override or [] + data = load_yaml(path) + return ParallelTask._load_from_dict(data=data, path=path, params_override=params_override) + + @classmethod + def _load_from_dict( + cls, + data: dict, + path: Optional[Union[PathLike, str]] = None, + params_override: Optional[list] = None, + **kwargs: Any, + ) -> "ParallelTask": + params_override = params_override or [] + context = { + BASE_PATH_CONTEXT_KEY: Path(path).parent if path else Path.cwd(), + PARAMS_OVERRIDE_KEY: params_override, + } + res: ParallelTask = load_from_dict(ComponentParallelTaskSchema, data, context, **kwargs) + return res + + @classmethod + def _from_dict(cls, dct: dict) -> "ParallelTask": + obj = cls(**dict(dct.items())) + return obj + + def _validate(self) -> None: + if self.type is None: + msg = "'type' is required for ParallelTask {}." + raise ValidationException( + message=msg.format(self.type), + target=ErrorTarget.COMPONENT, + no_personal_data_message=msg.format(""), + error_category=ErrorCategory.USER_ERROR, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parameterized_parallel.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parameterized_parallel.py new file mode 100644 index 00000000..6b5dbced --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/parameterized_parallel.py @@ -0,0 +1,96 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from typing import Any, Dict, List, Optional, Union + +from ..job_resource_configuration import JobResourceConfiguration +from .parallel_task import ParallelTask +from .retry_settings import RetrySettings + +module_logger = logging.getLogger(__name__) + + +class ParameterizedParallel: + """Parallel component that contains the traning parallel and supporting parameters for the parallel. + + :param retry_settings: parallel component run failed retry + :type retry_settings: BatchRetrySettings + :param logging_level: A string of the logging level name + :type logging_level: str + :param max_concurrency_per_instance: The max parallellism that each compute instance has. + :type max_concurrency_per_instance: int + :param error_threshold: The number of item processing failures should be ignored. + :type error_threshold: int + :param mini_batch_error_threshold: The number of mini batch processing failures should be ignored. + :type mini_batch_error_threshold: int + :param task: The parallel task. + :type task: ParallelTask + :param mini_batch_size: The mini batch size. + :type mini_batch_size: str + :param input_data: The input data. + :type input_data: str + :param resources: Compute Resource configuration for the job. + :type resources: Union[Dict, ~azure.ai.ml.entities.JobResourceConfiguration] + """ + + # pylint: disable=too-many-instance-attributes + def __init__( + self, + retry_settings: Optional[RetrySettings] = None, + logging_level: Optional[str] = None, + max_concurrency_per_instance: Optional[int] = None, + error_threshold: Optional[int] = None, + mini_batch_error_threshold: Optional[int] = None, + input_data: Optional[str] = None, + task: Optional[ParallelTask] = None, + mini_batch_size: Optional[int] = None, + partition_keys: Optional[List] = None, + resources: Optional[Union[dict, JobResourceConfiguration]] = None, + environment_variables: Optional[Dict] = None, + ): + self.mini_batch_size = mini_batch_size + self.partition_keys = partition_keys + self.task = task + self.retry_settings = retry_settings + self.input_data = input_data + self.logging_level = logging_level + self.max_concurrency_per_instance = max_concurrency_per_instance + self.error_threshold = error_threshold + self.mini_batch_error_threshold = mini_batch_error_threshold + self.resources = resources + self.environment_variables = dict(environment_variables) if environment_variables else {} + + @property + def task(self) -> Optional[ParallelTask]: + res: Optional[ParallelTask] = self._task + return res + + @task.setter + def task(self, value: Any) -> None: + if isinstance(value, dict): + value = ParallelTask(**value) + self._task = value + + @property + def resources(self) -> Optional[Union[dict, JobResourceConfiguration]]: + res: Optional[Union[dict, JobResourceConfiguration]] = self._resources + return res + + @resources.setter + def resources(self, value: Any) -> None: + if isinstance(value, dict): + value = JobResourceConfiguration(**value) + self._resources = value + + @property + def retry_settings(self) -> Optional[RetrySettings]: + res: Optional[RetrySettings] = self._retry_settings + return res + + @retry_settings.setter + def retry_settings(self, value: Any) -> None: + if isinstance(value, dict): + value = RetrySettings(**value) + self._retry_settings = value diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/retry_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/retry_settings.py new file mode 100644 index 00000000..2fb19ba1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/retry_settings.py @@ -0,0 +1,78 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from os import PathLike +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._schema.component.retry_settings import RetrySettingsSchema +from azure.ai.ml._utils.utils import load_yaml +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, PARAMS_OVERRIDE_KEY +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin +from azure.ai.ml.entities._util import load_from_dict + + +class RetrySettings(RestTranslatableMixin, DictMixin): + """Parallel RetrySettings. + + :param timeout: Timeout in seconds for each invocation of the run() method. + (optional) This value could be set through PipelineParameter. + :type timeout: int + :param max_retries: The number of maximum tries for a failed or timeout mini batch. + The range is [1, int.max]. This value could be set through PipelineParameter. + A mini batch with dequeue count greater than this won't be processed again and will be deleted directly. + :type max_retries: int + """ + + def __init__( + self, # pylint: disable=unused-argument + *, + timeout: Optional[Union[int, str]] = None, + max_retries: Optional[Union[int, str]] = None, + **kwargs: Any, + ): + self.timeout = timeout + self.max_retries = max_retries + + def _to_dict(self) -> Dict: + res: dict = RetrySettingsSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) # pylint: disable=no-member + return res + + @classmethod + def _load( + cls, # pylint: disable=unused-argument + path: Optional[Union[PathLike, str]] = None, + params_override: Optional[list] = None, + **kwargs: Any, + ) -> "RetrySettings": + params_override = params_override or [] + data = load_yaml(path) + return RetrySettings._load_from_dict(data=data, path=path, params_override=params_override) + + @classmethod + def _load_from_dict( + cls, + data: dict, + path: Optional[Union[PathLike, str]] = None, + params_override: Optional[list] = None, + **kwargs: Any, + ) -> "RetrySettings": + params_override = params_override or [] + context = { + BASE_PATH_CONTEXT_KEY: Path(path).parent if path else Path.cwd(), + PARAMS_OVERRIDE_KEY: params_override, + } + res: RetrySettings = load_from_dict(RetrySettingsSchema, data, context, **kwargs) + return res + + @classmethod + def _from_dict(cls, dct: dict) -> "RetrySettings": + obj = cls(**dict(dct.items())) + return obj + + def _to_rest_object(self) -> Dict: + return self._to_dict() + + @classmethod + def _from_rest_object(cls, obj: dict) -> "RetrySettings": + return cls._from_dict(obj) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/run_function.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/run_function.py new file mode 100644 index 00000000..180cee76 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parallel/run_function.py @@ -0,0 +1,66 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + + +from typing import Any, Optional, Union + +from azure.ai.ml.constants import ParallelTaskType +from azure.ai.ml.entities._assets.environment import Environment + +from .parallel_task import ParallelTask + + +class RunFunction(ParallelTask): + """Run Function. + + :param code: A local or remote path pointing at source code. + :type code: str + :param entry_script: User script which will be run in parallel on multiple nodes. This is + specified as a local file path. + The entry_script should contain two functions: + ``init()``: this function should be used for any costly or common preparation for subsequent inferences, + e.g., deserializing and loading the model into a global object. + ``run(mini_batch)``: The method to be parallelized. Each invocation will have one mini-batch. + 'mini_batch': Batch inference will invoke run method and pass either a list or a Pandas DataFrame as an + argument to the method. Each entry in min_batch will be a filepath if input is a FileDataset, + a Pandas DataFrame if input is a TabularDataset. + run() method should return a Pandas DataFrame or an array. + For append_row output_action, these returned elements are appended into the common output file. + For summary_only, the contents of the elements are ignored. For all output actions, + each returned output element indicates one successful inference of input element in the input mini-batch. + Each parallel worker process will call `init` once and then loop over `run` function until all mini-batches + are processed. + :type entry_script: str + :param program_arguments: The arguments of the parallel task. + :type args: str + :param model: The model of the parallel task. + :type model: str + :param append_row_to: All values output by run() method invocations will be aggregated into + one unique file which is created in the output location. + if it is not set, 'summary_only' would invoked, which means user script is expected to store the output itself. + :type append_row_to: str + :param environment: Environment that training job will run in. + :type environment: Union[Environment, str] + """ + + def __init__( + self, + *, + code: Optional[str] = None, + entry_script: Optional[str] = None, + program_arguments: Optional[str] = None, + model: Optional[str] = None, + append_row_to: Optional[str] = None, + environment: Optional[Union[Environment, str]] = None, + **kwargs: Any, + ): + super().__init__( + code=code, + entry_script=entry_script, + program_arguments=program_arguments, + model=model, + append_row_to=append_row_to, + environment=environment, + type=ParallelTaskType.RUN_FUNCTION, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parameterized_command.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parameterized_command.py new file mode 100644 index 00000000..57604b38 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parameterized_command.py @@ -0,0 +1,170 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import logging +import os +from typing import Dict, Optional, Union + +from marshmallow import INCLUDE + +from azure.ai.ml._restclient.v2023_04_01_preview.models import SweepJob +from azure.ai.ml._schema.core.fields import ExperimentalField +from azure.ai.ml.entities._assets import Environment + +from ..._schema import NestedField, UnionField +from ..._schema.job.distribution import ( + MPIDistributionSchema, + PyTorchDistributionSchema, + RayDistributionSchema, + TensorFlowDistributionSchema, +) +from .distribution import ( + DistributionConfiguration, + MpiDistribution, + PyTorchDistribution, + RayDistribution, + TensorFlowDistribution, +) +from .job_resource_configuration import JobResourceConfiguration +from .queue_settings import QueueSettings + +module_logger = logging.getLogger(__name__) + +# no reference found. leave it for future use. +INPUT_BINDING_PREFIX = "AZURE_ML_INPUT_" +OLD_INPUT_BINDING_PREFIX = "AZURE_ML_INPUT" + + +class ParameterizedCommand: + """Command component version that contains the command and supporting parameters for a Command component + or job. + + This class should not be instantiated directly. Instead, use the child class + ~azure.ai.ml.entities.CommandComponent. + + :param command: The command to be executed. Defaults to "". + :type command: str + :param resources: The compute resource configuration for the command. + :type resources: Optional[Union[dict, ~azure.ai.ml.entities.JobResourceConfiguration]] + :param code: The source code to run the job. Can be a local path or "http:", "https:", or "azureml:" url pointing + to a remote location. + :type code: Optional[str] + :param environment_variables: A dictionary of environment variable names and values. + These environment variables are set on the process where user script is being executed. + :type environment_variables: Optional[dict[str, str]] + :param distribution: The distribution configuration for distributed jobs. + :type distribution: Optional[Union[dict, ~azure.ai.ml.PyTorchDistribution, ~azure.ai.ml.MpiDistribution, + ~azure.ai.ml.TensorFlowDistribution, ~azure.ai.ml.RayDistribution]] + :param environment: The environment that the job will run in. + :type environment: Optional[Union[str, ~azure.ai.ml.entities.Environment]] + :param queue_settings: The queue settings for the job. + :type queue_settings: Optional[~azure.ai.ml.entities.QueueSettings] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + """ + + def __init__( + self, + command: Optional[str] = "", + resources: Optional[Union[dict, JobResourceConfiguration]] = None, + code: Optional[Union[str, os.PathLike]] = None, + environment_variables: Optional[Dict] = None, + distribution: Optional[ + Union[ + Dict, + MpiDistribution, + TensorFlowDistribution, + PyTorchDistribution, + RayDistribution, + DistributionConfiguration, + ] + ] = None, + environment: Optional[Union[Environment, str]] = None, + queue_settings: Optional[QueueSettings] = None, + **kwargs: Dict, + ) -> None: + super().__init__(**kwargs) + self.command = command + self.code = code + self.environment_variables = dict(environment_variables) if environment_variables else {} + self.environment = environment + self.distribution = distribution + self.resources = resources # type: ignore[assignment] + self.queue_settings = queue_settings + + @property + def distribution( + self, + ) -> Optional[ + Union[ + dict, + MpiDistribution, + TensorFlowDistribution, + PyTorchDistribution, + RayDistribution, + DistributionConfiguration, + ] + ]: + """The configuration for the distributed command component or job. + + :return: The distribution configuration. + :rtype: Union[~azure.ai.ml.PyTorchDistribution, ~azure.ai.ml.MpiDistribution, + ~azure.ai.ml.TensorFlowDistribution, ~azure.ai.ml.RayDistribution] + """ + return self._distribution + + @distribution.setter + def distribution(self, value: Union[dict, PyTorchDistribution, MpiDistribution]) -> None: + """Sets the configuration for the distributed command component or job. + + :param value: The distribution configuration for distributed jobs. + :type value: Union[dict, ~azure.ai.ml.PyTorchDistribution, ~azure.ai.ml.MpiDistribution, + ~azure.ai.ml.TensorFlowDistribution, ~azure.ai.ml.RayDistribution] + """ + if isinstance(value, dict): + dist_schema = UnionField( + [ + NestedField(PyTorchDistributionSchema, unknown=INCLUDE), + NestedField(TensorFlowDistributionSchema, unknown=INCLUDE), + NestedField(MPIDistributionSchema, unknown=INCLUDE), + ExperimentalField(NestedField(RayDistributionSchema, unknown=INCLUDE)), + ] + ) + value = dist_schema._deserialize(value=value, attr=None, data=None) + self._distribution = value + + @property + def resources(self) -> JobResourceConfiguration: + """The compute resource configuration for the command component or job. + + :return: The compute resource configuration for the command component or job. + :rtype: ~azure.ai.ml.entities.JobResourceConfiguration + """ + return self._resources + + @resources.setter + def resources(self, value: Union[dict, JobResourceConfiguration]) -> None: + """Sets the compute resource configuration for the command component or job. + + :param value: The compute resource configuration for the command component or job. + :type value: Union[dict, ~azure.ai.ml.entities.JobResourceConfiguration] + """ + if isinstance(value, dict): + value = JobResourceConfiguration(**value) + self._resources = value + + @classmethod + def _load_from_sweep_job(cls, sweep_job: SweepJob) -> "ParameterizedCommand": + parameterized_command = cls( + command=sweep_job.trial.command, + code=sweep_job.trial.code_id, + environment_variables=sweep_job.trial.environment_variables, + environment=sweep_job.trial.environment_id, + distribution=DistributionConfiguration._from_rest_object(sweep_job.trial.distribution), + resources=JobResourceConfiguration._from_rest_object(sweep_job.trial.resources), + queue_settings=QueueSettings._from_rest_object(sweep_job.queue_settings), + ) + return parameterized_command diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parameterized_spark.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parameterized_spark.py new file mode 100644 index 00000000..c8a9a0c0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/parameterized_spark.py @@ -0,0 +1,88 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +import os +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml.entities._assets import Environment +from azure.ai.ml.entities._job.spark_job_entry import SparkJobEntry + +from .._job.spark_job_entry_mixin import SparkJobEntryMixin + +DUMMY_IMAGE = "conda/miniconda3" + + +class ParameterizedSpark(SparkJobEntryMixin): + """ + This class should not be instantiated directly. Instead, use the child class ~azure.ai.ml.entities.SparkComponent. + + Spark component that contains supporting parameters. + + :param code: The source code to run the job. Can be a local path or "http:", "https:", or "azureml:" url pointing + to a remote location. + :type code: Optional[Union[str, os.PathLike]] + :param entry: The file or class entry point. + :type entry: dict[str, str] + :param py_files: The list of .zip, .egg or .py files to place on the PYTHONPATH for Python apps. + :type py_files: Optional[list[str]] + :param jars: The list of .JAR files to include on the driver and executor classpaths. + :type jars: Optional[list[str]] + :param files: The list of files to be placed in the working directory of each executor. + :type files: Optional[list[str]] + :param archives: The list of archives to be extracted into the working directory of each executor. + :type archives: Optional[list[str]] + :param conf: A dictionary with pre-defined Spark configurations key and values. + :type conf: Optional[dict[str, str]] + :param environment: The Azure ML environment to run the job in. + :type environment: Optional[Union[str, ~azure.ai.ml.entities.Environment]] + :param args: The arguments for the job. + :type args: Optional[str] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + """ + + def __init__( + self, + code: Optional[Union[str, os.PathLike]] = ".", + entry: Optional[Union[Dict[str, str], SparkJobEntry]] = None, + py_files: Optional[List[str]] = None, + jars: Optional[List[str]] = None, + files: Optional[List[str]] = None, + archives: Optional[List[str]] = None, + conf: Optional[Dict[str, str]] = None, + environment: Optional[Union[str, Environment]] = None, + args: Optional[str] = None, + **kwargs: Any, + ) -> None: + self.args = None + + super().__init__(**kwargs) + self.code = code + self.entry = entry + self.py_files = py_files + self.jars = jars + self.files = files + self.archives = archives + self.conf = conf + self.environment = environment + self.args = args + + @property + def environment(self) -> Optional[Union[str, Environment]]: + """The Azure ML environment to run the Spark component or job in. + + :return: The Azure ML environment to run the Spark component or job in. + :rtype: Optional[Union[str, ~azure.ai.ml.entities.Environment]] + """ + if isinstance(self._environment, Environment) and self._environment.image is None: + return Environment(conda_file=self._environment.conda_file, image=DUMMY_IMAGE) + return self._environment + + @environment.setter + def environment(self, value: Optional[Union[str, Environment]]) -> None: + """Sets the Azure ML environment to run the Spark component or job in. + + :param value: The Azure ML environment to run the Spark component or job in. + :type value: Optional[Union[str, ~azure.ai.ml.entities.Environment]] + """ + self._environment = value diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_attr_dict.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_attr_dict.py new file mode 100644 index 00000000..cf8d92be --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_attr_dict.py @@ -0,0 +1,161 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import logging +from abc import ABC +from typing import Any, Dict, Generic, List, Optional, TypeVar + +K = TypeVar("K") +V = TypeVar("V") + + +class _AttrDict(Generic[K, V], Dict, ABC): + """This class is used for accessing values with instance.some_key. It supports the following scenarios: + + 1. Setting arbitrary attribute, eg: obj.resource_layout.node_count = 2 + 1.1 Setting same nested filed twice will return same object, eg: + obj.resource_layout.node_count = 2 + obj.resource_layout.process_count_per_node = 2 + obj.resource_layout will be {"node_count": 2, "process_count_per_node": 2} + 1.2 Only public attribute is supported, eg: obj._resource_layout._node_count = 2 will raise AttributeError + 1.3 All set attribute can be recorded, eg: + obj.target = "aml" + obj.resource_layout.process_count_per_node = 2 + obj.get_attr() will return {"target": "aml", "resource_layout": {"process_count_per_node": 2}} + 2. Getting arbitrary attribute, getting non-exist attribute will return an empty dict. + 3. Calling arbitrary methods is not allowed, eg: obj.resource_layout() should raise AttributeError + """ + + def __init__(self, allowed_keys: Optional[Dict] = None, **kwargs: Any): + """Initialize a attribute dictionary. + + :param allowed_keys: A dictionary of keys that allowed to set as arbitrary attributes. None means all keys can + be set as arbitrary attributes. + + :type dict + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + super(_AttrDict, self).__init__(**kwargs) + if allowed_keys is None: + # None allowed_keys means no restriction on keys can be set for _AttrDict + self._allowed_keys = {} + self._key_restriction = False + else: + # Otherwise use allowed_keys to restrict keys can be set for _AttrDict + self._allowed_keys = dict(allowed_keys) + self._key_restriction = True + self._logger = logging.getLogger("attr_dict") + + def _initializing(self) -> bool: + # use this to indicate ongoing init process, sub class need to make sure this return True during init process. + return False + + def _get_attrs(self) -> dict: + """Get all arbitrary attributes which has been set, empty values are excluded. + + :return: A dict which contains all arbitrary attributes set by user. + :rtype: dict + """ + + # TODO: check this + def remove_empty_values(data: Dict) -> Dict: + if not isinstance(data, dict): + return data + # skip empty dicts as default value of _AttrDict is empty dict + return {k: remove_empty_values(v) for k, v in data.items() if v or not isinstance(v, dict)} + + return remove_empty_values(self) + + def _is_arbitrary_attr(self, attr_name: str) -> bool: + """Checks if a given attribute name should be treat as arbitrary attribute. + + Attributes inside _AttrDict can be non-arbitrary attribute or arbitrary attribute. + Non-arbitrary attributes are normal attributes like other object which stores in self.__dict__. + Arbitrary attributes are attributes stored in the dictionary it self, what makes it special it it's value + can be an instance of _AttrDict + Take `obj = _AttrDict(allowed_keys={"resource_layout": {"node_count": None}})` as an example. + `obj.some_key` is accessing non-arbitrary attribute. + `obj.resource_layout` is accessing arbitrary attribute, user can use `obj.resource_layout.node_count = 1` to + assign value to it. + + :param attr_name: Attribute name + :type attr_name: str + :return: If the given attribute name should be treated as arbitrary attribute. + :rtype: bool + """ + # Internal attribute won't be set as arbitrary attribute. + if attr_name.startswith("_"): + return False + # All attributes set in __init__ won't be set as arbitrary attribute + if self._initializing(): + return False + # If there's key restriction, only keys in it can be set as arbitrary attribute. + if self._key_restriction and attr_name not in self._allowed_keys: + return False + # Attributes already in attribute dict will not be set as arbitrary attribute. + try: + self.__getattribute__(attr_name) + except AttributeError: + return True + return False + + def __getattr__(self, key: Any) -> Any: + if not self._is_arbitrary_attr(key): + return super().__getattribute__(key) + self._logger.debug("getting %s", key) + try: + return super().__getitem__(key) + except KeyError: + allowed_keys = self._allowed_keys.get(key, None) if self._key_restriction else None + result: Any = _AttrDict(allowed_keys=allowed_keys) + self.__setattr__(key, result) + return result + + def __setattr__(self, key: Any, value: V) -> None: + if not self._is_arbitrary_attr(key): + super().__setattr__(key, value) + else: + self._logger.debug("setting %s to %s", key, value) + super().__setitem__(key, value) + + def __setitem__(self, key: Any, value: V) -> None: + self.__setattr__(key, value) + + def __getitem__(self, item: V) -> Any: + # support attr_dict[item] since dumping it in marshmallow requires this. + return self.__getattr__(item) + + def __dir__(self) -> List: + # For Jupyter Notebook auto-completion + return list(super().__dir__()) + list(self.keys()) + + +def has_attr_safe(obj: Any, attr: Any) -> bool: + if isinstance(obj, _AttrDict): + has_attr = not obj._is_arbitrary_attr(attr) + elif isinstance(obj, dict): + return attr in obj + else: + has_attr = hasattr(obj, attr) + return has_attr + + +def try_get_non_arbitrary_attr(obj: Any, attr: str) -> Optional[Any]: + """Try to get non-arbitrary attribute for potential attribute dict. + + Will not create target attribute if it is an arbitrary attribute in _AttrDict. + + :param obj: The obj + :type obj: Any + :param attr: The attribute name + :type attr: str + :return: obj.attr + :rtype: Any + """ + if has_attr_safe(obj, attr): + return obj[attr] if isinstance(obj, dict) else getattr(obj, attr) + return None diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_component_translatable.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_component_translatable.py new file mode 100644 index 00000000..22be939d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_component_translatable.py @@ -0,0 +1,412 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +# pylint: disable=protected-access, redefined-builtin +# disable redefined-builtin to use input as argument name +import re +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union + +from pydash import get + +from azure.ai.ml._utils.utils import is_data_binding_expression +from azure.ai.ml.constants._common import AssetTypes +from azure.ai.ml.constants._component import ComponentJobConstants +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._job.pipeline._io import PipelineInput, PipelineOutput +from azure.ai.ml.entities._job.sweep.search_space import Choice, Randint, SweepDistribution +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, JobException + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._builders import BaseNode + from azure.ai.ml.entities._component.component import Component + + +class ComponentTranslatableMixin: + _PYTHON_SDK_TYPE_MAPPING = { + float: "number", + int: "integer", + bool: "boolean", + str: "string", + } + + @classmethod + def _find_source_from_parent_inputs(cls, input: str, pipeline_job_inputs: dict) -> Tuple[str, Optional[str]]: + """Find source type and mode of input/output from parent input. + + :param input: The input name + :type input: str + :param pipeline_job_inputs: The pipeline job inputs + :type pipeline_job_inputs: dict + :return: A 2-tuple of the type and the mode + :rtype: Tuple[str, Optional[str]] + """ + _input_name = input.split(".")[2][:-2] + if _input_name not in pipeline_job_inputs.keys(): + msg = "Failed to find top level definition for input binding {}." + raise JobException( + message=msg.format(input), + no_personal_data_message=msg.format("[input]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + input_data = pipeline_job_inputs[_input_name] + input_type = type(input_data) + if input_type in cls._PYTHON_SDK_TYPE_MAPPING: + return cls._PYTHON_SDK_TYPE_MAPPING[input_type], None + return getattr(input_data, "type", AssetTypes.URI_FOLDER), getattr(input_data, "mode", None) + + @classmethod + def _find_source_from_parent_outputs(cls, input: str, pipeline_job_outputs: dict) -> Tuple[str, Optional[str]]: + """Find source type and mode of input/output from parent output. + + :param input: The input name + :type input: str + :param pipeline_job_outputs: The pipeline job outputs + :type pipeline_job_outputs: dict + :return: A 2-tuple of the type and the mode + :rtype: Tuple[str, Optional[str]] + """ + _output_name = input.split(".")[2][:-2] + if _output_name not in pipeline_job_outputs.keys(): + msg = "Failed to find top level definition for output binding {}." + raise JobException( + message=msg.format(input), + no_personal_data_message=msg.format("[input]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + output_data = pipeline_job_outputs[_output_name] + output_type = type(output_data) + if output_type in cls._PYTHON_SDK_TYPE_MAPPING: + return cls._PYTHON_SDK_TYPE_MAPPING[output_type], None + if isinstance(output_data, dict): + if "type" in output_data: + output_data_type = output_data["type"] + else: + output_data_type = AssetTypes.URI_FOLDER + if "mode" in output_data: + output_data_mode = output_data["mode"] + else: + output_data_mode = None + return output_data_type, output_data_mode + return getattr(output_data, "type", AssetTypes.URI_FOLDER), getattr(output_data, "mode", None) + + @classmethod + def _find_source_from_other_jobs( + cls, input: str, jobs_dict: dict, pipeline_job_dict: dict + ) -> Tuple[str, Optional[str]]: + """Find source type and mode of input/output from other job. + + :param input: The input name + :type input: str + :param jobs_dict: The job dict + :type jobs_dict: + :param pipeline_job_dict: The pipeline job dict + :type pipeline_job_dict: dict + :return: A 2-tuple of the type and the mode + :rtype: Tuple[str, Optional[str]] + """ + from azure.ai.ml.entities import CommandJob + from azure.ai.ml.entities._builders import BaseNode + from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob + from azure.ai.ml.parallel import ParallelJob + + _input_regex = r"\${{parent.jobs.([^.]+).([^.]+).([^.]+)}}" + m = re.match(_input_regex, input) + if m is None: + msg = "Failed to find top level definition for job binding {}." + raise JobException( + message=msg.format(input), + no_personal_data_message=msg.format("[input]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + _input_job_name, _io_type, _name = m.groups() + _input_job = jobs_dict[_input_job_name] + + # we only support input of one job is from output of another output, but input mode should be decoupled with + # output mode, so we always return None source_mode + source_mode = None + if isinstance(_input_job, BaseNode): + # If source is base node, get type from io builder + _source = _input_job[_io_type][_name] + try: + source_type = _source.type + # Todo: get component type for registered component, and no need following codes + # source_type is None means _input_job's component is registered component which results in its + # input/output type is None. + if source_type is None: + if _source._data is None: + # return default type if _input_job's output data is None + source_type = AssetTypes.URI_FOLDER + elif isinstance(_source._data, Output): + # if _input_job data is a Output object and we return its type. + source_type = _source._data.type + else: + # otherwise _input_job's input/output is bound to pipeline input/output, we continue + # infer the type according to _source._data. Will return corresponding pipeline + # input/output type because we didn't get the component. + source_type, _ = cls._find_source_input_output_type(_source._data, pipeline_job_dict) + return source_type, source_mode + except AttributeError as e: + msg = "Failed to get referenced component type {}." + raise JobException( + message=msg.format(_input_regex), + no_personal_data_message=msg.format("[_input_regex]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) from e + if isinstance(_input_job, (CommandJob, ParallelJob)): + # If source has not parsed to Command yet, infer type + _source = get(_input_job, f"{_io_type}.{_name}") + if isinstance(_source, str): + source_type, _ = cls._find_source_input_output_type(_source, pipeline_job_dict) + return source_type, source_mode + return getattr(_source, "type", AssetTypes.URI_FOLDER), source_mode + if isinstance(_input_job, AutoMLJob): + # If source is AutoMLJob, only outputs is supported + if _io_type != "outputs": + msg = f"Only binding to AutoMLJob output is supported, currently got {_io_type}" + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + # AutoMLJob's output type can only be MLTABLE + return AssetTypes.MLTABLE, source_mode + msg = f"Unknown referenced source job type: {type(_input_job)}." + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + + @classmethod + def _find_source_input_output_type(cls, input: str, pipeline_job_dict: dict) -> Tuple[str, Optional[str]]: + """Find source type and mode of input/output. + + :param input: The input binding + :type input: str + :param pipeline_job_dict: The pipeline job dict + :type pipeline_job_dict: dict + :return: A 2-tuple of the type and the mode + :rtype: Tuple[str, Optional[str]] + """ + pipeline_job_inputs = pipeline_job_dict.get("inputs", {}) + pipeline_job_outputs = pipeline_job_dict.get("outputs", {}) + jobs_dict = pipeline_job_dict.get("jobs", {}) + if is_data_binding_expression(input, ["parent", "inputs"]): + return cls._find_source_from_parent_inputs(input, pipeline_job_inputs) + if is_data_binding_expression(input, ["parent", "outputs"]): + return cls._find_source_from_parent_outputs(input, pipeline_job_outputs) + if is_data_binding_expression(input, ["parent", "jobs"]): + try: + return cls._find_source_from_other_jobs(input, jobs_dict, pipeline_job_dict) + except JobException as e: + raise e + except Exception as e: + msg = "Failed to find referenced source for input binding {}" + raise JobException( + message=msg.format(input), + no_personal_data_message=msg.format("[input]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.SYSTEM_ERROR, + ) from e + else: + msg = "Job input in a pipeline can bind only to a job output or a pipeline input" + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + + @classmethod + def _to_input( + cls, # pylint: disable=unused-argument + input: Union[Input, str, bool, int, float], + pipeline_job_dict: Optional[dict] = None, + **kwargs: Any, + ) -> Input: + """Convert a single job input value to component input. + + :param input: The input + :type input: Union[Input, str, bool, int, float] + :param pipeline_job_dict: The pipeline job dict + :type pipeline_job_dict: Optional[dict] + :return: The Component Input + :rtype: Input + """ + pipeline_job_dict = pipeline_job_dict or {} + input_variable: Dict = {} + + if isinstance(input, str) and bool(re.search(ComponentJobConstants.INPUT_PATTERN, input)): + # handle input bindings + input_variable["type"], input_variable["mode"] = cls._find_source_input_output_type( + input, pipeline_job_dict + ) + + elif isinstance(input, Input): + input_variable = input._to_dict() + elif isinstance(input, SweepDistribution): + if isinstance(input, Choice): + if input.values is not None: + input_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[type(input.values[0])] + elif isinstance(input, Randint): + input_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[int] + else: + input_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[float] + + input_variable["optional"] = False + elif type(input) in cls._PYTHON_SDK_TYPE_MAPPING: + input_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[type(input)] + input_variable["default"] = input + elif isinstance(input, PipelineInput): + # Infer input type from input data + input_variable = input._to_input()._to_dict() + else: + msg = "'{}' is not supported as component input, supported types are '{}'.".format( + type(input), cls._PYTHON_SDK_TYPE_MAPPING.keys() + ) + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + return Input(**input_variable) + + @classmethod + def _to_input_builder_function(cls, input: Union[Dict, SweepDistribution, Input, str, bool, int, float]) -> Input: + input_variable = {} + + if isinstance(input, Input): + input_variable = input._to_dict() + elif isinstance(input, SweepDistribution): + if isinstance(input, Choice): + if input.values is not None: + input_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[type(input.values[0])] + elif isinstance(input, Randint): + input_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[int] + else: + input_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[float] + + input_variable["optional"] = False + else: + input_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[type(input)] + input_variable["default"] = input + return Input(**input_variable) + + @classmethod + def _to_output( + cls, # pylint: disable=unused-argument + output: Optional[Union[Output, Dict, str, bool, int, float]], + pipeline_job_dict: Optional[dict] = None, + **kwargs: Any, + ) -> Output: + """Translate output value to Output and infer component output type + from linked pipeline output, its original type or default type. + + :param output: The output + :type output: Union[Output, str, bool, int, float] + :param pipeline_job_dict: The pipeline job dict + :type pipeline_job_dict: Optional[dict] + :return: The output object + :rtype: Output + """ + pipeline_job_dict = pipeline_job_dict or {} + output_type = None + if not pipeline_job_dict or output is None: + try: + output_type = output.type # type: ignore + except AttributeError: + # default to url_folder if failed to get type + output_type = AssetTypes.URI_FOLDER + output_variable = {"type": output_type} + return Output(**output_variable) + output_variable = {} + + if isinstance(output, str) and bool(re.search(ComponentJobConstants.OUTPUT_PATTERN, output)): + # handle output bindings + output_variable["type"], output_variable["mode"] = cls._find_source_input_output_type( + output, pipeline_job_dict + ) + + elif isinstance(output, Output): + output_variable = output._to_dict() + + elif isinstance(output, PipelineOutput): + output_variable = output._to_output()._to_dict() + + elif type(output) in cls._PYTHON_SDK_TYPE_MAPPING: + output_variable["type"] = cls._PYTHON_SDK_TYPE_MAPPING[type(output)] + output_variable["default"] = output + else: + msg = "'{}' is not supported as component output, supported types are '{}'.".format( + type(output), cls._PYTHON_SDK_TYPE_MAPPING.keys() + ) + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + return Output(**output_variable) + + def _to_inputs(self, inputs: Optional[Dict], **kwargs: Any) -> Dict: + """Translate inputs to Inputs. + + :param inputs: mapping from input name to input object. + :type inputs: Dict[str, Union[Input, str, bool, int, float]] + :return: mapping from input name to translated component input. + :rtype: Dict[str, Input] + """ + pipeline_job_dict = kwargs.get("pipeline_job_dict", {}) + translated_component_inputs = {} + if inputs is not None: + for io_name, io_value in inputs.items(): + translated_component_inputs[io_name] = self._to_input(io_value, pipeline_job_dict) + return translated_component_inputs + + def _to_outputs(self, outputs: Optional[Dict], **kwargs: Any) -> Dict: + """Translate outputs to Outputs. + + :param outputs: mapping from output name to output object. + :type outputs: Dict[str, Output] + :return: mapping from output name to translated component output. + :rtype: Dict[str, Output] + """ + # Translate outputs to Outputs. + pipeline_job_dict = kwargs.get("pipeline_job_dict", {}) + translated_component_outputs = {} + if outputs is not None: + for output_name, output_value in outputs.items(): + translated_component_outputs[output_name] = self._to_output(output_value, pipeline_job_dict) + return translated_component_outputs + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> Union["Component", str]: + """Translate to Component. + + :param context: The context + :type context: Optional[context] + :return: Translated Component. + :rtype: Component + """ + # Note: Source of translated component should be same with Job + # And should be set after called _to_component/_to_node as job has no _source now. + raise NotImplementedError() + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "BaseNode": + """Translate to pipeline node. + + :param context: The context + :type context: Optional[context] + :return: Translated node. + :rtype: BaseNode + """ + # Note: Source of translated component should be same with Job + # And should be set after called _to_component/_to_node as job has no _source now. + raise NotImplementedError() diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/__init__.py new file mode 100644 index 00000000..3ccde947 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/__init__.py @@ -0,0 +1,21 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +"""Classes in this package converts input & output set by user to pipeline job input & output.""" + +from .attr_dict import OutputsAttrDict, _GroupAttrDict +from .base import InputOutputBase, NodeInput, NodeOutput, PipelineInput, PipelineOutput +from .mixin import AutoMLNodeIOMixin, NodeWithGroupInputMixin, PipelineJobIOMixin + +__all__ = [ + "PipelineOutput", + "PipelineInput", + "NodeOutput", + "NodeInput", + "InputOutputBase", + "OutputsAttrDict", + "_GroupAttrDict", + "NodeWithGroupInputMixin", + "AutoMLNodeIOMixin", + "PipelineJobIOMixin", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/attr_dict.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/attr_dict.py new file mode 100644 index 00000000..0ae08bcd --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/attr_dict.py @@ -0,0 +1,170 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml.entities._assets import Data +from azure.ai.ml.entities._inputs_outputs import GroupInput, Input, Output +from azure.ai.ml.entities._job.pipeline._attr_dict import K +from azure.ai.ml.entities._job.pipeline._io.base import NodeInput, NodeOutput, PipelineInput +from azure.ai.ml.exceptions import ( + ErrorCategory, + ErrorTarget, + UnexpectedAttributeError, + UnexpectedKeywordError, + ValidationException, +) + + +class InputsAttrDict(dict): + def __init__(self, inputs: dict, **kwargs: Any): + self._validate_inputs(inputs) + super(InputsAttrDict, self).__init__(**inputs, **kwargs) + + @classmethod + def _validate_inputs(cls, inputs: Any) -> None: + msg = "Pipeline/component input should be a \ + azure.ai.ml.entities._job.pipeline._io.NodeInput with owner, got {}." + for val in inputs.values(): + if isinstance(val, NodeInput) and val._owner is not None: # pylint: disable=protected-access + continue + if isinstance(val, _GroupAttrDict): + continue + raise ValidationException( + message=msg.format(val), + no_personal_data_message=msg.format("[val]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + + def __setattr__( + self, + key: str, + value: Union[int, bool, float, str, NodeOutput, PipelineInput, Input], + ) -> None: + # Extract enum value. + value = value.value if isinstance(value, Enum) else value + original_input = self.__getattr__(key) # Note that an exception will be raised if the keyword is invalid. + if isinstance(original_input, _GroupAttrDict) or isinstance(value, _GroupAttrDict): + # Set the value directly if is parameter group. + self._set_group_with_type_check(key, GroupInput.custom_class_value_to_attr_dict(value)) + return + original_input._data = original_input._build_data(value) + + def _set_group_with_type_check(self, key: Any, value: Any) -> None: + msg = "{!r} is expected to be a parameter group, but got {}." + if not isinstance(value, _GroupAttrDict): + raise ValidationException( + message=msg.format(key, type(value)), + no_personal_data_message=msg.format("[key]", "[value_type]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + self.__setitem__(key, GroupInput.custom_class_value_to_attr_dict(value)) + + def __getattr__(self, item: Any) -> NodeInput: + res: NodeInput = self.__getitem__(item) + return res + + +class _GroupAttrDict(InputsAttrDict): + """This class is used for accessing values with instance.some_key.""" + + @classmethod + def _validate_inputs(cls, inputs: Any) -> None: + msg = "Pipeline/component input should be a azure.ai.ml.entities._job.pipeline._io.NodeInput, got {}." + for val in inputs.values(): + if isinstance(val, NodeInput) and val._owner is not None: # pylint: disable=protected-access + continue + if isinstance(val, _GroupAttrDict): + continue + # Allow PipelineInput as Group may appear at top level pipeline input. + if isinstance(val, PipelineInput): + continue + raise ValidationException( + message=msg.format(val), + no_personal_data_message=msg.format("[val]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + + def __getattr__(self, name: K) -> Any: + if name not in self: + raise UnexpectedAttributeError(keyword=name, keywords=list(self)) + return super().__getitem__(name) + + def __getitem__(self, item: K) -> Any: + # We raise this exception instead of KeyError + if item not in self: + raise UnexpectedKeywordError(func_name="ParameterGroup", keyword=item, keywords=list(self)) + return super().__getitem__(item) + + # For Jupyter Notebook auto-completion + def __dir__(self) -> List: + return list(super().__dir__()) + list(self.keys()) + + def flatten(self, group_parameter_name: Optional[str]) -> Dict: + # Return the flattened result of self + + group_parameter_name = group_parameter_name if group_parameter_name else "" + flattened_parameters = {} + msg = "'%s' in parameter group should be a azure.ai.ml.entities._job._io.NodeInput, got '%s'." + for k, v in self.items(): + flattened_name = ".".join([group_parameter_name, k]) + if isinstance(v, _GroupAttrDict): + flattened_parameters.update(v.flatten(flattened_name)) + elif isinstance(v, NodeInput): + flattened_parameters[flattened_name] = v._to_job_input() # pylint: disable=protected-access + else: + raise ValidationException( + message=msg % (flattened_name, type(v)), + no_personal_data_message=msg % ("name", "type"), + target=ErrorTarget.PIPELINE, + ) + return flattened_parameters + + def insert_group_name_for_items(self, group_name: Any) -> None: + # Insert one group name for all items. + for v in self.values(): + if isinstance(v, _GroupAttrDict): + v.insert_group_name_for_items(group_name) + elif isinstance(v, PipelineInput): + # Insert group names for pipeline input + v._group_names = [group_name] + v._group_names # pylint: disable=protected-access + + +class OutputsAttrDict(dict): + def __init__(self, outputs: dict, **kwargs: Any): + for val in outputs.values(): + if not isinstance(val, NodeOutput) or val._owner is None: + msg = "Pipeline/component output should be a azure.ai.ml.dsl.Output with owner, got {}." + raise ValidationException( + message=msg.format(val), + no_personal_data_message=msg.format("[val]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + super(OutputsAttrDict, self).__init__(**outputs, **kwargs) + + def __getattr__(self, item: Any) -> NodeOutput: + return self.__getitem__(item) + + def __getitem__(self, item: Any) -> NodeOutput: + if item not in self: + # We raise this exception instead of KeyError as OutputsAttrDict doesn't support add new item after + # __init__. + raise UnexpectedAttributeError(keyword=item, keywords=list(self)) + res: NodeOutput = super().__getitem__(item) + return res + + def __setattr__(self, key: str, value: Union[Data, Output]) -> None: + if isinstance(value, Output): + mode = value.mode + value = Output(type=value.type, path=value.path, mode=mode, name=value.name, version=value.version) + original_output = self.__getattr__(key) # Note that an exception will be raised if the keyword is invalid. + original_output._data = original_output._build_data(value) + + def __setitem__(self, key: str, value: Output) -> None: + return self.__setattr__(key, value) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/base.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/base.py new file mode 100644 index 00000000..b17972ae --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/base.py @@ -0,0 +1,848 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import copy +import re +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, TypeVar, Union, cast, overload + +from azure.ai.ml._utils.utils import is_data_binding_expression +from azure.ai.ml.constants import AssetTypes +from azure.ai.ml.constants._component import IOConstants +from azure.ai.ml.entities._assets._artifacts.data import Data +from azure.ai.ml.entities._assets._artifacts.model import Model +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._job.pipeline._pipeline_expression import PipelineExpressionMixin +from azure.ai.ml.entities._util import resolve_pipeline_parameter +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, UserErrorException, ValidationException + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities import PipelineJob + from azure.ai.ml.entities._builders import BaseNode + +T = TypeVar("T") + + +def _build_data_binding(data: Union[str, "PipelineInput", "Output"]) -> Union[str, Output]: + """Build input builders to data bindings. + + :param data: The data to build a data binding from + :type data: Union[str, PipelineInput, Output] + :return: A data binding string if data isn't a str, otherwise data + :rtype: str + """ + result: Union[str, Output] = "" + + if isinstance(data, (InputOutputBase)): + # Build data binding when data is PipelineInput, Output + result = data._data_binding() + else: + # Otherwise just return the data + result = data + return result + + +def _resolve_builders_2_data_bindings( + data: Union[list, dict, str, "PipelineInput", "Output"] +) -> Union[dict, list, str, Output]: + """Traverse data and build input builders inside it to data bindings. + + :param data: The bindings to resolve + :type data: Union[list, dict, str, "PipelineInput", "Output"] + :return: + * A dict if data was a dict + * A list if data was a list + * A str otherwise + :rtype: Union[list, dict, str] + """ + if isinstance(data, dict): + for key, val in data.items(): + if isinstance(val, (dict, list)): + data[key] = _resolve_builders_2_data_bindings(val) + else: + data[key] = _build_data_binding(val) + return data + if isinstance(data, list): + resolved_data = [] + for val in data: + resolved_data.append(_resolve_builders_2_data_bindings(val)) + return resolved_data + return _build_data_binding(data) + + +def _data_to_input(data: Union[Data, Model]) -> Input: + """Convert a Data object to an Input object. + + :param data: The data to convert + :type data: Data + :return: The Input object + :rtype: Input + """ + if data.id: + return Input(type=data.type, path=data.id) + return Input(type=data.type, path=f"{data.name}:{data.version}") + + +class InputOutputBase(ABC): + # TODO: refine this code, always use _data to store builder level settings and use _meta to store definition + # TODO: when _data missing, return value from _meta + + def __init__( + self, + meta: Optional[Union[Input, Output]], + data: Optional[Union[int, bool, float, str, Input, Output, "PipelineInput"]], + default_data: Optional[Union[int, bool, float, str, Input, Output]] = None, + **kwargs: Any, + ): + """Base class of input & output. + + :param meta: Metadata of this input/output, eg: type, min, max, etc. + :type meta: Union[Input, Output] + :param data: Actual value of input/output, None means un-configured data. + :type data: Union[None, int, bool, float, str, + azure.ai.ml.Input, + azure.ai.ml.Output] + :param default_data: default value of input/output, None means un-configured data. + :type default_data: Union[None, int, bool, float, str, + azure.ai.ml.Input, + azure.ai.ml.Output] + """ + self._meta = meta + self._original_data = data + self._data: Any = self._build_data(data) + self._default_data = default_data + self._type: str = meta.type if meta is not None else kwargs.pop("type", None) + self._mode = self._get_mode(original_data=data, data=self._data, kwargs=kwargs) + self._description = ( + self._data.description + if self._data is not None and hasattr(self._data, "description") and self._data.description + else kwargs.pop("description", None) + ) + # TODO: remove this + self._attribute_map: Dict = {} + self._name: Optional[str] = "" + self._version: Optional[str] = "" + super(InputOutputBase, self).__init__(**kwargs) + + @abstractmethod + def _build_data(self, data: T) -> Union[T, str, Input, "InputOutputBase"]: + """Validate if data matches type and translate it to Input/Output acceptable type. + + :param data: The data + :type data: T + :return: The built data + :rtype: Union[T, str, Input, InputOutputBase] + """ + + @abstractmethod + def _build_default_data(self) -> None: + """Build default data when data not configured.""" + + @property + def type(self) -> str: + """Type of input/output. + + :return: The type + :rtype: str + """ + return self._type + + @type.setter + def type(self, type: Any) -> None: # pylint: disable=redefined-builtin + # For un-configured input/output, we build a default data entry for them. + self._build_default_data() + self._type = type + if isinstance(self._data, (Input, Output)): + self._data.type = type + elif self._data is not None and not isinstance( + self._data, (int, float, str) + ): # when type of self._data is InputOutputBase or its child class + self._data._type = type + + @property + def mode(self) -> Optional[str]: + return self._mode + + @mode.setter + def mode(self, mode: Optional[str]) -> None: + # For un-configured input/output, we build a default data entry for them. + self._build_default_data() + self._mode = mode + if isinstance(self._data, (Input, Output)): + self._data.mode = mode + elif self._data is not None and not isinstance(self._data, (int, float, str)): + self._data._mode = mode + + @property + def description(self) -> Any: + return self._description + + @description.setter + def description(self, description: str) -> None: + # For un-configured input/output, we build a default data entry for them. + self._build_default_data() + self._description = description + if isinstance(self._data, (Input, Output)): + self._data.description = description + elif self._data is not None and not isinstance(self._data, (int, float, str)): + self._data._description = description + + @property + def path(self) -> Optional[str]: + # This property is introduced for static intellisense. + if hasattr(self._data, "path"): + if self._data is not None and not isinstance(self._data, (int, float, str)): + res: Optional[str] = self._data.path + return res + msg = f"{type(self._data)} does not have path." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + + @path.setter + def path(self, path: str) -> None: + # For un-configured input/output, we build a default data entry for them. + self._build_default_data() + if hasattr(self._data, "path"): + if self._data is not None and not isinstance(self._data, (int, float, str)): + self._data.path = path + else: + msg = f"{type(self._data)} does not support setting path." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + + def _data_binding(self) -> str: + """Return data binding string representation for this input/output. + + :return: The data binding string + :rtype: str + """ + raise NotImplementedError() + + # Why did we have this function? It prevents the DictMixin from being applied. + # Unclear if we explicitly do NOT want the mapping protocol to be applied to this, or it this was just + # confirmation that it didn't at the time. + def keys(self) -> None: + # This property is introduced to raise catchable Exception in marshmallow mapping validation trial. + raise TypeError(f"'{type(self).__name__}' object is not a mapping") + + def __str__(self) -> str: + try: + return self._data_binding() + except AttributeError: + return super(InputOutputBase, self).__str__() + + def __hash__(self) -> int: + return id(self) + + @classmethod + def _get_mode( + cls, + original_data: Optional[Union[int, bool, float, str, Input, Output, "PipelineInput"]], + data: Optional[Union[int, bool, float, str, Input, Output]], + kwargs: dict, + ) -> Optional[str]: + """Get mode of this input/output builder. + + :param original_data: Original value of input/output. + :type original_data: Union[None, int, bool, float, str + azure.ai.ml.Input, + azure.ai.ml.Output, + azure.ai.ml.entities._job.pipeline._io.PipelineInput] + :param data: Built input/output data. + :type data: Union[None, int, bool, float, str + azure.ai.ml.Input, + azure.ai.ml.Output] + :param kwargs: The kwargs + :type kwargs: Dict + :return: The mode + :rtype: Optional[str] + """ + # pipeline level inputs won't pass mode to bound node level inputs + if isinstance(original_data, PipelineInput): + return None + return data.mode if data is not None and hasattr(data, "mode") else kwargs.pop("mode", None) + + @property + def _is_primitive_type(self) -> bool: + return self.type in IOConstants.PRIMITIVE_STR_2_TYPE + + +class NodeInput(InputOutputBase): + """Define one input of a Component.""" + + def __init__( + self, + port_name: str, + meta: Optional[Input], + *, + data: Optional[Union[int, bool, float, str, Output, "PipelineInput", Input]] = None, + # TODO: Bug Item number: 2883405 + owner: Optional[Union["BaseComponent", "PipelineJob"]] = None, # type: ignore + **kwargs: Any, + ): + """Initialize an input of a component. + + :param name: The name of the input. + :type name: str + :param meta: Metadata of this input, eg: type, min, max, etc. + :type meta: Input + :param data: The input data. Valid types include int, bool, float, str, + Output of another component or pipeline input and Input. + Note that the output of another component or pipeline input associated should be reachable in the scope + of current pipeline. Input is introduced to support case like + TODO: new examples + component.inputs.xxx = Input(path="arm_id") + :type data: Union[int, bool, float, str + azure.ai.ml.Output, + azure.ai.ml.Input] + :param owner: The owner component of the input, used to calculate binding. + :type owner: Union[azure.ai.ml.entities.BaseNode, azure.ai.ml.entities.PipelineJob] + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + # TODO: validate data matches type in meta + # TODO: validate supported data + self._port_name = port_name + self._owner = owner + super().__init__(meta=meta, data=data, **kwargs) + + def _build_default_data(self) -> None: + """Build default data when input not configured.""" + if self._data is None: + self._data = Input() + + def _build_data(self, data: T) -> Union[T, str, Input, InputOutputBase]: + """Build input data according to assigned input + + eg: node.inputs.key = data + + :param data: The data + :type data: T + :return: The built data + :rtype: Union[T, str, Input, "PipelineInput", "NodeOutput"] + """ + _data: Union[T, str, NodeOutput] = resolve_pipeline_parameter(data) + if _data is None: + return _data + # Unidiomatic typecheck: Checks that data is _exactly_ this type, and not potentially a subtype + if type(_data) is NodeInput: # pylint: disable=unidiomatic-typecheck + msg = "Can not bind input to another component's input." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + if isinstance(_data, (PipelineInput, NodeOutput)): + # If value is input or output, it's a data binding, we require it have a owner so we can convert it to + # a data binding, eg: ${{inputs.xxx}} + if isinstance(_data, NodeOutput) and _data._owner is None: + msg = "Setting input binding {} to output without owner is not allowed." + raise ValidationException( + message=msg.format(_data), + no_personal_data_message=msg.format("[_data]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + return _data + # for data binding case, set is_singular=False for case like "${{parent.inputs.job_in_folder}}/sample1.csv" + if isinstance(_data, Input) or is_data_binding_expression(_data, is_singular=False): + return _data + if isinstance(_data, (Data, Model)): + return _data_to_input(_data) + # self._meta.type could be None when sub pipeline has no annotation + if isinstance(self._meta, Input) and self._meta.type and not self._meta._is_primitive_type: + if isinstance(_data, str): + return Input(type=self._meta.type, path=_data) + msg = "only path input is supported now but get {}: {}." + raise UserErrorException( + message=msg.format(type(_data), _data), + no_personal_data_message=msg.format(type(_data), "[_data]"), + ) + return _data + + def _to_job_input(self) -> Optional[Union[Input, str, Output]]: + """convert the input to Input, this logic will change if backend contract changes.""" + result: Optional[Union[Input, str, Output]] = None + + if self._data is None: + # None data means this input is not configured. + result = None + elif isinstance(self._data, (PipelineInput, NodeOutput)): + # Build data binding when data is PipelineInput, Output + result = Input(path=self._data._data_binding(), mode=self.mode) + elif is_data_binding_expression(self._data): + result = Input(path=self._data, mode=self.mode) + else: + data_binding = _build_data_binding(self._data) + if is_data_binding_expression(self._data): + result = Input(path=data_binding, mode=self.mode) + else: + result = data_binding + # TODO: validate is self._data is supported + + return result + + def _data_binding(self) -> str: + msg = "Input binding {} can only come from a pipeline, currently got {}" + # call type(self._owner) to avoid circular import + raise ValidationException( + message=msg.format(self._port_name, type(self._owner)), + target=ErrorTarget.PIPELINE, + no_personal_data_message=msg.format("[port_name]", "[owner]"), + error_category=ErrorCategory.USER_ERROR, + ) + + def _copy(self, owner: Any) -> "NodeInput": + return NodeInput( + port_name=self._port_name, + data=self._data, + owner=owner, + meta=cast(Input, self._meta), + ) + + def _deepcopy(self) -> "NodeInput": + return NodeInput( + port_name=self._port_name, + data=copy.copy(self._data), + owner=self._owner, + meta=cast(Input, self._meta), + ) + + def _get_data_owner(self) -> Optional["BaseNode"]: + """Gets the data owner of the node + + Note: This only works for @pipeline, not for YAML pipeline. + + Note: Inner step will be returned as the owner when node's input is from sub pipeline's output. + @pipeline + def sub_pipeline(): + inner_node = component_func() + return inner_node.outputs + + @pipeline + def root_pipeline(): + pipeline_node = sub_pipeline() + node = copy_files_component_func(input_dir=pipeline_node.outputs.output_dir) + owner = node.inputs.input_dir._get_data_owner() + assert owner == pipeline_node.nodes[0] + + :return: The node if Input is from another node's output. Returns None for literal value. + :rtype: Optional[BaseNode] + """ + from azure.ai.ml.entities import Pipeline + from azure.ai.ml.entities._builders import BaseNode + + def _resolve_data_owner(data: Any) -> Optional["BaseNode"]: + if isinstance(data, BaseNode) and not isinstance(data, Pipeline): + return data + while isinstance(data, PipelineInput): + # for pipeline input, it's original value(can be literal value or another node's output) + # is stored in _original_data + return _resolve_data_owner(data._original_data) + if isinstance(data, NodeOutput): + if isinstance(data._owner, Pipeline): + # for input from subgraph's output, trace back to inner node + return _resolve_data_owner(data._binding_output) + # for input from another node's output, return the node + return _resolve_data_owner(data._owner) + return None + + return _resolve_data_owner(self._data) + + +class NodeOutput(InputOutputBase, PipelineExpressionMixin): + """Define one output of a Component.""" + + def __init__( + self, + port_name: str, + meta: Optional[Union[Input, Output]], + *, + data: Optional[Union[Output, str]] = None, + # TODO: Bug Item number: 2883405 + owner: Optional[Union["BaseComponent", "PipelineJob"]] = None, # type: ignore + binding_output: Optional["NodeOutput"] = None, + **kwargs: Any, + ): + """Initialize an Output of a component. + + :param port_name: The port_name of the output. + :type port_name: str + :param name: The name used to register NodeOutput/PipelineOutput data. + :type name: str + :param version: The version used to register NodeOutput/PipelineOutput data. + :ype version: str + :param data: The output data. Valid types include str, Output + :type data: Union[str + azure.ai.ml.entities.Output] + :param mode: The mode of the output. + :type mode: str + :param owner: The owner component of the output, used to calculate binding. + :type owner: Union[azure.ai.ml.entities.BaseNode, azure.ai.ml.entities.PipelineJob] + :param binding_output: The node output bound to pipeline output, only available for pipeline. + :type binding_output: azure.ai.ml.entities.NodeOutput + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + :raises ~azure.ai.ml.exceptions.ValidationException: Raised if object cannot be successfully validated. + Details will be provided in the error message. + """ + # Allow inline output binding with string, eg: "component_out_path_1": "${{parents.outputs.job_out_data_1}}" + if data is not None and not isinstance(data, (Output, str)): + msg = "Got unexpected type for output: {}." + raise ValidationException( + message=msg.format(data), + target=ErrorTarget.PIPELINE, + no_personal_data_message=msg.format("[data]"), + ) + super().__init__(meta=meta, data=data, **kwargs) + self._port_name = port_name + self._owner = owner + self._name: Optional[str] = self._data.name if isinstance(self._data, Output) else None + self._version: Optional[str] = self._data.version if isinstance(self._data, Output) else None + + self._assert_name_and_version() + + # store original node output to be able to trace back to inner node from a pipeline output builder. + self._binding_output = binding_output + + @property + def port_name(self) -> str: + """The output port name, eg: node.outputs.port_name. + + :return: The port name + :rtype: str + """ + return self._port_name + + @property + def name(self) -> Optional[str]: + """Used in registering output data. + + :return: The output name + :rtype: str + """ + return self._name + + @name.setter + def name(self, name: str) -> None: + """Assigns the name to NodeOutput/PipelineOutput and builds data according to the name. + + :param name: The new name + :type name: str + """ + self._build_default_data() + self._name = name + if isinstance(self._data, Output): + self._data.name = name + elif isinstance(self._data, InputOutputBase): + self._data._name = name + else: + raise UserErrorException( + f"We support self._data of Input, Output, InputOutputBase, NodeOutput and NodeInput," + f"but got type: {type(self._data)}." + ) + + @property + def version(self) -> Optional[str]: + """Used in registering output data. + + :return: The output data + :rtype: str + """ + return self._version + + @version.setter + def version(self, version: str) -> None: + """Assigns the version to NodeOutput/PipelineOutput and builds data according to the version. + + :param version: The new version + :type version: str + """ + self._build_default_data() + self._version = version + if isinstance(self._data, Output): + self._data.version = version + elif isinstance(self._data, InputOutputBase): + self._data._version = version + else: + raise UserErrorException( + f"We support self._data of Input, Output, InputOutputBase, NodeOutput and NodeInput," + f"but got type: {type(self._data)}." + ) + + @property + def path(self) -> Any: + # For node output path, + if self._data is not None and hasattr(self._data, "path"): + return self._data.path + return None + + @path.setter + def path(self, path: Optional[str]) -> None: + # For un-configured output, we build a default data entry for them. + self._build_default_data() + if self._data is not None and hasattr(self._data, "path"): + self._data.path = path + else: + # YAML job will have string output binding and do not support setting path for it. + msg = f"{type(self._data)} does not support setting path." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + + def _assert_name_and_version(self) -> None: + if self.name and not (re.match("^[A-Za-z0-9_-]*$", self.name) and len(self.name) <= 255): + raise UserErrorException( + f"The output name {self.name} can only contain alphanumeric characters, dashes and underscores, " + f"with a limit of 255 characters." + ) + if self.version and not self.name: + raise UserErrorException("Output name is required when output version is specified.") + + def _build_default_data(self) -> None: + """Build default data when output not configured.""" + if self._data is None: + # _meta will be None when node._component is not a Component object + # so we just leave the type inference work to backend + self._data = Output(type=None) # type: ignore[call-overload] + + def _build_data(self, data: T) -> Any: + """Build output data according to assigned input, eg: node.outputs.key = data + + :param data: The data + :type data: T + :return: `data` + :rtype: T + """ + if data is None: + return data + if not isinstance(data, (Output, str)): + msg = f"{self.__class__.__name__} only allow set {Output.__name__} object, {type(data)} is not supported." + raise ValidationException( + message=msg, + target=ErrorTarget.PIPELINE, + no_personal_data_message=msg, + error_category=ErrorCategory.USER_ERROR, + ) + res: T = cast(T, data) + return res + + def _to_job_output(self) -> Optional[Output]: + """Convert the output to Output, this logic will change if backend contract changes.""" + if self._data is None: + # None data means this output is not configured. + result = None + elif isinstance(self._data, str): + result = Output( + type=AssetTypes.URI_FOLDER, path=self._data, mode=self.mode, name=self.name, version=self.version + ) + elif isinstance(self._data, Output): + result = self._data + elif isinstance(self._data, PipelineOutput): + result = Output( + type=AssetTypes.URI_FOLDER, + path=self._data._data_binding(), + mode=self.mode, + name=self._data.name, + version=self._data.version, + description=self.description, + ) + else: + msg = "Got unexpected type for output: {}." + raise ValidationException( + message=msg.format(self._data), + target=ErrorTarget.PIPELINE, + no_personal_data_message=msg.format("[data]"), + ) + return result + + def _data_binding(self) -> str: + if self._owner is not None: + return f"${{{{parent.jobs.{self._owner.name}.outputs.{self._port_name}}}}}" + + return "" + + def _copy(self, owner: Any) -> "NodeOutput": + return NodeOutput( + port_name=self._port_name, + data=cast(Output, self._data), + owner=owner, + meta=self._meta, + ) + + def _deepcopy(self) -> "NodeOutput": + return NodeOutput( + port_name=self._port_name, + data=cast(Output, copy.copy(self._data)), + owner=self._owner, + meta=self._meta, + binding_output=self._binding_output, + ) + + +class PipelineInput(NodeInput, PipelineExpressionMixin): + """Define one input of a Pipeline.""" + + def __init__(self, name: str, meta: Optional[Input], group_names: Optional[List[str]] = None, **kwargs: Any): + """Initialize a PipelineInput. + + :param name: The name of the input. + :type name: str + :param meta: Metadata of this input, eg: type, min, max, etc. + :type meta: Input + :param group_names: The input parameter's group names. + :type group_names: List[str] + """ + super(PipelineInput, self).__init__(port_name=name, meta=meta, **kwargs) + self._group_names = group_names if group_names else [] + + def result(self) -> Any: + """Return original value of pipeline input. + + :return: The original value of pipeline input + :rtype: Any + + Example: + + .. code-block:: python + + @pipeline + def pipeline_func(param1): + # node1's param1 will get actual value of param1 instead of a input binding. + node1 = component_func(param1=param1.result()) + """ + + # use this to break self loop + original_data_cache: Set = set() + original_data = self._original_data + while isinstance(original_data, PipelineInput) and original_data not in original_data_cache: + original_data_cache.add(original_data) + original_data = original_data._original_data + return original_data + + def __str__(self) -> str: + return self._data_binding() + + @overload + def _build_data(self, data: Union[Model, Data]) -> Input: ... + + @overload + def _build_data(self, data: T) -> Any: ... + + def _build_data(self, data: Union[Model, Data, T]) -> Any: + """Build data according to input type. + + :param data: The data + :type data: Union[Model, Data, T] + :return: + * Input if data is a Model or Data + * data otherwise + :rtype: Union[Input, T] + """ + if data is None: + return data + # Unidiomatic typecheck: Checks that data is _exactly_ this type, and not potentially a subtype + if type(data) is NodeInput: # pylint: disable=unidiomatic-typecheck + msg = "Can not bind input to another component's input." + raise ValidationException(message=msg, no_personal_data_message=msg, target=ErrorTarget.PIPELINE) + if isinstance(data, (PipelineInput, NodeOutput)): + # If value is input or output, it's a data binding, owner is required to convert it to + # a data binding, eg: ${{parent.inputs.xxx}} + if isinstance(data, NodeOutput) and data._owner is None: + msg = "Setting input binding {} to output without owner is not allowed." + raise ValidationException( + message=msg.format(data), + no_personal_data_message=msg.format("[data]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + return data + if isinstance(data, (Data, Model)): + # If value is Data, we convert it to an corresponding Input + return _data_to_input(data) + return data + + def _data_binding(self) -> str: + full_name = "%s.%s" % (".".join(self._group_names), self._port_name) if self._group_names else self._port_name + return f"${{{{parent.inputs.{full_name}}}}}" + + def _to_input(self) -> Optional[Union[Input, Output]]: + """Convert pipeline input to component input for pipeline component. + + :return: The component input + :rtype: Input + """ + if self._data is None: + # None data means this input is not configured. + return self._meta + data_type = self._data.type if isinstance(self._data, Input) else None + # If type is asset type, return data type without default. + # Else infer type from data and set it as default. + if data_type and data_type.lower() in AssetTypes.__dict__.values(): + if not isinstance(self._data, (int, float, str)): + result = Input(type=data_type, mode=self._data.mode) + elif type(self._data) in IOConstants.PRIMITIVE_TYPE_2_STR: + result = Input( + type=IOConstants.PRIMITIVE_TYPE_2_STR[type(self._data)], + default=self._data, + ) + else: + msg = f"Unsupported Input type {type(self._data)} detected when translate job to component." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + return result # pylint: disable=possibly-used-before-assignment + + +class PipelineOutput(NodeOutput): + """Define one output of a Pipeline.""" + + def _to_job_output(self) -> Optional[Output]: + result: Optional[Output] = None + if isinstance(self._data, Output): + # For pipeline output with type Output, always pass to backend. + result = self._data + elif self._data is None and self._meta and self._meta.type: + # For un-configured pipeline output with meta, we need to return Output with accurate type, + # so it won't default to uri_folder. + result = Output(type=self._meta.type, mode=self._meta.mode, description=self._meta.description) + else: + result = super(PipelineOutput, self)._to_job_output() + # Copy meta type to avoid built output's None type default to uri_folder. + if self.type and result is not None and not result.type: + result.type = self.type + return result + + def _data_binding(self) -> str: + return f"${{{{parent.outputs.{self._port_name}}}}}" + + def _to_output(self) -> Optional[Output]: + """Convert pipeline output to component output for pipeline component.""" + if self._data is None: + # None data means this input is not configured. + return None + if isinstance(self._meta, Output): + return self._meta + # Assign type directly as we didn't have primitive output type for now. + if not isinstance(self._data, (int, float, str)): + return Output(type=self._data.type, mode=self._data.mode) + return Output() diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/mixin.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/mixin.py new file mode 100644 index 00000000..6c3d9357 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_io/mixin.py @@ -0,0 +1,623 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import copy +from typing import Any, Dict, List, Optional, Tuple, Type, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobInput as RestJobInput +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobOutput as RestJobOutput +from azure.ai.ml.constants._component import ComponentJobConstants +from azure.ai.ml.entities._inputs_outputs import GroupInput, Input, Output +from azure.ai.ml.entities._util import copy_output_setting +from azure.ai.ml.exceptions import ErrorTarget, ValidationErrorType, ValidationException + +from ..._input_output_helpers import ( + from_rest_data_outputs, + from_rest_inputs_to_dataset_literal, + to_rest_data_outputs, + to_rest_dataset_literal_inputs, +) +from .._pipeline_job_helpers import from_dict_to_rest_io, process_sdk_component_job_io +from .attr_dict import InputsAttrDict, OutputsAttrDict, _GroupAttrDict +from .base import NodeInput, NodeOutput, PipelineInput, PipelineOutput + + +class NodeIOMixin: + """Provides ability to wrap node inputs/outputs and build data bindings + dynamically.""" + + @classmethod + def _get_supported_inputs_types(cls) -> Optional[Any]: + return None + + @classmethod + def _get_supported_outputs_types(cls) -> Optional[Any]: + return None + + @classmethod + def _validate_io(cls, value: Any, allowed_types: Optional[tuple], *, key: Optional[str] = None) -> None: + if allowed_types is None: + return + + if value is None or isinstance(value, allowed_types): + pass + else: + msg = "Expecting {} for input/output {}, got {} instead." + raise ValidationException( + message=msg.format(allowed_types, key, type(value)), + no_personal_data_message=msg.format(allowed_types, "[key]", type(value)), + target=ErrorTarget.PIPELINE, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + def _build_input( + self, + name: str, + meta: Optional[Input], + data: Optional[Union[dict, int, bool, float, str, Output, "PipelineInput", Input]], + ) -> NodeInput: + # output mode of last node should not affect input mode of next node + if isinstance(data, NodeOutput): + # Decoupled input and output + # value = copy.deepcopy(value) + data = data._deepcopy() # pylint: disable=protected-access + data.mode = None + elif isinstance(data, dict): + # Use type comparison instead of is_instance to skip _GroupAttrDict + # when loading from yaml io will be a dict, + # like {'job_data_path': '${{parent.inputs.pipeline_job_data_path}}'} + # parse dict to allowed type + data = Input(**data) + + # parameter group can be of custom type, so we don't check it here + if meta is not None and not isinstance(meta, GroupInput): + self._validate_io(data, self._get_supported_inputs_types(), key=name) + return NodeInput(port_name=name, meta=meta, data=data, owner=self) + + def _build_output(self, name: str, meta: Optional[Output], data: Optional[Union[Output, str]]) -> NodeOutput: + if isinstance(data, dict): + data = Output(**data) + + self._validate_io(data, self._get_supported_outputs_types(), key=name) + # For un-configured outputs, settings it to None, so we won't pass extra fields(eg: default mode) + return NodeOutput(port_name=name, meta=meta, data=data, owner=self) + + # pylint: disable=unused-argument + def _get_default_input_val(self, val: Any): # type: ignore + # use None value as data placeholder for unfilled inputs. + # server side will fill the default value + return None + + def _build_inputs_dict( + self, + inputs: Dict[str, Union[Input, str, bool, int, float]], + *, + input_definition_dict: Optional[dict] = None, + ) -> InputsAttrDict: + """Build an input attribute dict so user can get/set inputs by + accessing attribute, eg: node1.inputs.xxx. + + :param inputs: Provided kwargs when parameterizing component func. + :type inputs: Dict[str, Union[Input, str, bool, int, float]] + :keyword input_definition_dict: Static input definition dict. If not provided, will build inputs without meta. + :paramtype input_definition_dict: dict + :return: Built dynamic input attribute dict. + :rtype: InputsAttrDict + """ + if input_definition_dict is not None: + # TODO: validate inputs.keys() in input_definitions.keys() + input_dict = {} + for key, val in input_definition_dict.items(): + if key in inputs.keys(): + # If input is set through component functions' kwargs, create an input object with real value. + data = inputs[key] + else: + data = self._get_default_input_val(val) # pylint: disable=assignment-from-none + + val = self._build_input(name=key, meta=val, data=data) + input_dict[key] = val + else: + input_dict = {key: self._build_input(name=key, meta=None, data=val) for key, val in inputs.items()} + return InputsAttrDict(input_dict) + + def _build_outputs_dict( + self, outputs: Dict, *, output_definition_dict: Optional[dict] = None, none_data: bool = False + ) -> OutputsAttrDict: + """Build an output attribute dict so user can get/set outputs by + accessing attribute, eg: node1.outputs.xxx. + + :param outputs: Provided kwargs when parameterizing component func. + :type outputs: Dict[str, Output] + :keyword output_definition_dict: Static output definition dict. + :paramtype output_definition_dict: Dict + :keyword none_data: If True, will set output data to None. + :paramtype none_data: bool + :return: Built dynamic output attribute dict. + :rtype: OutputsAttrDict + """ + if output_definition_dict is not None: + # TODO: check if we need another way to mark a un-configured output instead of just set None. + # Create None as data placeholder for all outputs. + output_dict = {} + for key, val in output_definition_dict.items(): + if key in outputs.keys(): + # If output has given value, create an output object with real value. + val = self._build_output(name=key, meta=val, data=outputs[key]) + else: + val = self._build_output(name=key, meta=val, data=None) + output_dict[key] = val + else: + output_dict = {} + for key, val in outputs.items(): + output_val = self._build_output(name=key, meta=None, data=val if not none_data else None) + output_dict[key] = output_val + return OutputsAttrDict(output_dict) + + def _build_inputs(self) -> Dict: + """Build inputs of this component to a dict dict which maps output to + actual value. + + The built input dict will have same input format as other jobs, eg: + { + "input_data": Input(path="path/to/input/data", mode="Mount"), + "input_value": 10, + "learning_rate": "${{jobs.step1.inputs.learning_rate}}" + } + + :return: The input dict + :rtype: Dict[str, Union[Input, str, bool, int, float]] + """ + inputs = {} + # pylint: disable=redefined-builtin + for name, input in self.inputs.items(): # type: ignore + if isinstance(input, _GroupAttrDict): + # Flatten group inputs into inputs dict + inputs.update(input.flatten(group_parameter_name=name)) + continue + inputs[name] = input._to_job_input() # pylint: disable=protected-access + return inputs + + def _build_outputs(self) -> Dict[str, Output]: + """Build outputs of this component to a dict which maps output to + actual value. + + The built output dict will have same output format as other jobs, eg: + { + "eval_output": "${{jobs.eval.outputs.eval_output}}" + } + + :return: The output dict + :rtype: Dict[str, Output] + """ + outputs = {} + for name, output in self.outputs.items(): # type: ignore + if isinstance(output, NodeOutput): + output = output._to_job_output() # pylint: disable=protected-access + outputs[name] = output + # Remove non-configured output + return {k: v for k, v in outputs.items() if v is not None} + + def _to_rest_inputs(self) -> Dict[str, Dict]: + """Translate input builders to rest input dicts. + + The built dictionary's format aligns with component job's input yaml, eg: + { + "input_data": {"data": {"path": "path/to/input/data"}, "mode"="Mount"}, + "input_value": 10, + "learning_rate": "${{jobs.step1.inputs.learning_rate}}" + } + + :return: The REST inputs + :rtype: Dict[str, Dict] + """ + built_inputs = self._build_inputs() + return self._input_entity_to_rest_inputs(input_entity=built_inputs) + + @classmethod + def _input_entity_to_rest_inputs(cls, input_entity: Dict[str, Input]) -> Dict[str, Dict]: + # Convert io entity to rest io objects + input_bindings, dataset_literal_inputs = process_sdk_component_job_io( + input_entity, [ComponentJobConstants.INPUT_PATTERN] + ) + + # parse input_bindings to InputLiteral(value=str(binding)) + rest_inputs = {**input_bindings, **dataset_literal_inputs} + # Note: The function will only be called from BaseNode, + # and job_type is used to enable dot in pipeline job input keys, + # so pass job_type as None directly here. + rest_inputs = to_rest_dataset_literal_inputs(rest_inputs, job_type=None) + + # convert rest io to dict + rest_dataset_literal_inputs = {} + for name, val in rest_inputs.items(): + rest_dataset_literal_inputs[name] = val.as_dict() + if hasattr(val, "mode") and val.mode: + rest_dataset_literal_inputs[name].update({"mode": val.mode.value}) + return rest_dataset_literal_inputs + + def _to_rest_outputs(self) -> Dict[str, Dict]: + """Translate output builders to rest output dicts. + + The built dictionary's format aligns with component job's output yaml, eg: + {"eval_output": "${{jobs.eval.outputs.eval_output}}"} + + :return: The REST outputs + :rtype: Dict[str, Dict] + """ + built_outputs = self._build_outputs() + + # Convert io entity to rest io objects + output_bindings, data_outputs = process_sdk_component_job_io( + built_outputs, [ComponentJobConstants.OUTPUT_PATTERN] + ) + rest_data_outputs = to_rest_data_outputs(data_outputs) + + # convert rest io to dict + # parse output_bindings to {"value": binding, "type": "literal"} since there's no mode for it + rest_output_bindings = {} + for key, binding in output_bindings.items(): + rest_output_bindings[key] = {"value": binding["value"], "type": "literal"} + if "mode" in binding: + rest_output_bindings[key].update({"mode": binding["mode"].value}) + if "name" in binding: + rest_output_bindings[key].update({"name": binding["name"]}) + if "version" in binding: + rest_output_bindings[key].update({"version": binding["version"]}) + + def _rename_name_and_version(output_dict: Dict) -> Dict: + # NodeOutput can only be registered with name and version, therefore we rename here + if "asset_name" in output_dict.keys(): + output_dict["name"] = output_dict.pop("asset_name") + if "asset_version" in output_dict.keys(): + output_dict["version"] = output_dict.pop("asset_version") + return output_dict + + rest_data_outputs = {name: _rename_name_and_version(val.as_dict()) for name, val in rest_data_outputs.items()} + self._update_output_types(rest_data_outputs) + rest_data_outputs.update(rest_output_bindings) + return rest_data_outputs + + @classmethod + def _from_rest_inputs(cls, inputs: Dict) -> Dict[str, Union[Input, str, bool, int, float]]: + """Load inputs from rest inputs. + + :param inputs: The REST inputs + :type inputs: Dict[str, Union[str, dict]] + :return: Input dict + :rtype: Dict[str, Union[Input, str, bool, int, float]] + """ + + # JObject -> RestJobInput/RestJobOutput + input_bindings, rest_inputs = from_dict_to_rest_io(inputs, RestJobInput, [ComponentJobConstants.INPUT_PATTERN]) + + # RestJobInput/RestJobOutput -> Input/Output + dataset_literal_inputs = from_rest_inputs_to_dataset_literal(rest_inputs) + + return {**dataset_literal_inputs, **input_bindings} + + @classmethod + def _from_rest_outputs(cls, outputs: Dict[str, Union[str, dict]]) -> Dict: + """Load outputs from rest outputs. + + :param outputs: The REST outputs + :type outputs: Dict[str, Union[str, dict]] + :return: Output dict + :rtype: Dict[str, Output] + """ + + # JObject -> RestJobInput/RestJobOutput + output_bindings, rest_outputs = from_dict_to_rest_io( + outputs, RestJobOutput, [ComponentJobConstants.OUTPUT_PATTERN] + ) + + # RestJobInput/RestJobOutput -> Input/Output + data_outputs = from_rest_data_outputs(rest_outputs) + + return {**data_outputs, **output_bindings} + + def _update_output_types(self, rest_data_outputs: dict) -> None: + """Update output types in rest_data_outputs according to meta level output. + + :param rest_data_outputs: The REST data outputs + :type rest_data_outputs: Dict + """ + + for name, rest_output in rest_data_outputs.items(): + original_output = self.outputs[name] # type: ignore + # for configured output with meta, "correct" the output type to file to avoid the uri_folder default value + if original_output and original_output.type: + if original_output.type in ["AnyFile", "uri_file"]: + rest_output["job_output_type"] = "uri_file" + + +def flatten_dict( + dct: Optional[Dict], + _type: Union[Type["_GroupAttrDict"], Type[GroupInput]], + *, + allow_dict_fields: Optional[List[str]] = None, +) -> Dict: + """Flatten inputs/input_definitions dict for inputs dict build. + + :param dct: The dictionary to flatten + :type dct: Dict + :param _type: Either _GroupAttrDict or GroupInput (both have the method `flatten`) + :type _type: Union[Type["_GroupAttrDict"], Type[GroupInput]] + :keyword allow_dict_fields: A list of keys for dictionary values that will be included in flattened output + :paramtype allow_dict_fields: Optional[List[str]] + :return: The flattened dict + :rtype: Dict + """ + _result = {} + if dct is not None: + for key, val in dct.items(): + # to support passing dict value as parameter group + if allow_dict_fields and key in allow_dict_fields and isinstance(val, dict): + # for child dict, all values are allowed to be dict + for flattened_key, flattened_val in flatten_dict( + val, _type, allow_dict_fields=list(val.keys()) + ).items(): + _result[key + "." + flattened_key] = flattened_val + continue + val = GroupInput.custom_class_value_to_attr_dict(val) + if isinstance(val, _type): + _result.update(val.flatten(group_parameter_name=key)) + continue + _result[key] = val + return _result + + +class NodeWithGroupInputMixin(NodeIOMixin): + """This class provide build_inputs_dict for a node to use ParameterGroup as an input.""" + + @classmethod + def _validate_group_input_type( + cls, + input_definition_dict: dict, + inputs: Dict[str, Union[Input, str, bool, int, float]], + ) -> None: + """Raise error when group input receive a value not group type. + + :param input_definition_dict: The input definition dict + :type input_definition_dict: dict + :param inputs: The inputs + :type inputs: Dict[str, Union[Input, str, bool, int, float]] + """ + # Note: We put and extra validation here instead of doing it in pipeline._validate() + # due to group input will be discarded silently if assign it to a non-group parameter. + group_msg = "'%s' is defined as a parameter group but got input '%s' with type '%s'." + non_group_msg = "'%s' is defined as a parameter but got a parameter group as input." + for key, val in inputs.items(): + definition = input_definition_dict.get(key) + val = GroupInput.custom_class_value_to_attr_dict(val) + if val is None: + continue + # 1. inputs.group = 'a string' + if isinstance(definition, GroupInput) and not isinstance(val, (_GroupAttrDict, dict)): + raise ValidationException( + message=group_msg % (key, val, type(val)), + no_personal_data_message=group_msg % ("[key]", "[val]", "[type(val)]"), + target=ErrorTarget.PIPELINE, + type=ValidationErrorType.INVALID_VALUE, + ) + # 2. inputs.str_param = group + if not isinstance(definition, GroupInput) and isinstance(val, _GroupAttrDict): + raise ValidationException( + message=non_group_msg % key, + no_personal_data_message=non_group_msg % "[key]", + target=ErrorTarget.PIPELINE, + type=ValidationErrorType.INVALID_VALUE, + ) + + @classmethod + def _flatten_inputs_and_definition( + cls, + inputs: Dict[str, Union[Input, str, bool, int, float]], + input_definition_dict: dict, + ) -> Tuple[Dict, Dict]: + """ + Flatten all GroupInput(definition) and GroupAttrDict recursively and build input dict. + For example: + input_definition_dict = { + "group1": GroupInput( + values={ + "param1": GroupInput( + values={ + "param1_1": Input(type="str"), + } + ), + "param2": Input(type="int"), + } + ), + "group2": GroupInput( + values={ + "param3": Input(type="str"), + } + ), + } => { + "group1.param1.param1_1": Input(type="str"), + "group1.param2": Input(type="int"), + "group2.param3": Input(type="str"), + } + inputs = { + "group1": { + "param1": { + "param1_1": "value1", + }, + "param2": 2, + }, + "group2": { + "param3": "value3", + }, + } => { + "group1.param1.param1_1": "value1", + "group1.param2": 2, + "group2.param3": "value3", + } + :param inputs: The inputs + :type inputs: Dict[str, Union[Input, str, bool, int, float]] + :param input_definition_dict: The input definition dict + :type input_definition_dict: dict + :return: The flattened inputs and definition + :rtype: Tuple[Dict, Dict] + """ + group_input_names = [key for key, val in input_definition_dict.items() if isinstance(val, GroupInput)] + flattened_inputs = flatten_dict(inputs, _GroupAttrDict, allow_dict_fields=group_input_names) + flattened_definition_dict = flatten_dict(input_definition_dict, GroupInput) + return flattened_inputs, flattened_definition_dict + + def _build_inputs_dict( + self, + inputs: Dict[str, Union[Input, str, bool, int, float]], + *, + input_definition_dict: Optional[dict] = None, + ) -> InputsAttrDict: + """Build an input attribute dict so user can get/set inputs by + accessing attribute, eg: node1.inputs.xxx. + + :param inputs: Provided kwargs when parameterizing component func. + :type inputs: Dict[str, Union[Input, str, bool, int, float]] + :keyword input_definition_dict: Input definition dict from component entity. + :paramtype input_definition_dict: dict + :return: Built input attribute dict. + :rtype: InputsAttrDict + """ + + # TODO: should we support group input when there is no local input definition? + if input_definition_dict is not None: + # Validate group mismatch + self._validate_group_input_type(input_definition_dict, inputs) + + # Flatten inputs and definition + flattened_inputs, flattened_definition_dict = self._flatten_inputs_and_definition( + inputs, input_definition_dict + ) + # Build: zip all flattened parameter with definition + inputs = super()._build_inputs_dict(flattened_inputs, input_definition_dict=flattened_definition_dict) + return InputsAttrDict(GroupInput.restore_flattened_inputs(inputs)) + return super()._build_inputs_dict(inputs) + + +class PipelineJobIOMixin(NodeWithGroupInputMixin): + """Provides ability to wrap pipeline job inputs/outputs and build data bindings + dynamically.""" + + def _build_input(self, name: str, meta: Optional[Input], data: Any) -> "PipelineInput": + return PipelineInput(name=name, meta=meta, data=data, owner=self) + + def _build_output( + self, name: str, meta: Optional[Union[Input, Output]], data: Optional[Union[Output, str]] + ) -> "PipelineOutput": + # TODO: settings data to None for un-configured outputs so we won't passing extra fields(eg: default mode) + result = PipelineOutput(port_name=name, meta=meta, data=data, owner=self) + return result + + def _build_inputs_dict( + self, + inputs: Dict[str, Union[Input, str, bool, int, float]], + *, + input_definition_dict: Optional[dict] = None, + ) -> InputsAttrDict: + """Build an input attribute dict so user can get/set inputs by + accessing attribute, eg: node1.inputs.xxx. + + :param inputs: Provided kwargs when parameterizing component func. + :type inputs: Dict[str, Union[Input, str, bool, int, float]] + :keyword input_definition_dict: Input definition dict from component entity. + :return: Built input attribute dict. + :rtype: InputsAttrDict + """ + input_dict = super()._build_inputs_dict(inputs, input_definition_dict=input_definition_dict) + # TODO: should we do this when input_definition_dict is not None? + # TODO: should we put this in super()._build_inputs_dict? + if input_definition_dict is None: + return InputsAttrDict(GroupInput.restore_flattened_inputs(input_dict)) + return input_dict + + def _build_output_for_pipeline(self, name: str, data: Optional[Union[Output, NodeOutput]]) -> "PipelineOutput": + """Build an output object for pipeline and copy settings from source output. + + :param name: Output name. + :type name: str + :param data: Output data. + :type data: Optional[Union[Output, NodeOutput]] + :return: Built output object. + :rtype: PipelineOutput + """ + # pylint: disable=protected-access + if data is None: + # For None output, build an empty output builder + output_val = self._build_output(name=name, meta=None, data=None) + elif isinstance(data, Output): + # For output entity, build an output builder with data points to it + output_val = self._build_output(name=name, meta=data, data=data) + elif isinstance(data, NodeOutput): + # For output builder, build a new output builder and copy settings from it + output_val = self._build_output(name=name, meta=data._meta, data=None) + copy_output_setting(source=data, target=output_val) + else: + message = "Unsupported output type: {} for pipeline output: {}: {}" + raise ValidationException( + message=message.format(type(data), name, data), + no_personal_data_message=message, + target=ErrorTarget.PIPELINE, + ) + return output_val + + def _build_pipeline_outputs_dict(self, outputs: Dict) -> OutputsAttrDict: + """Build an output attribute dict without output definition metadata. + For pipeline outputs, its setting should be copied from node level outputs. + + :param outputs: Node output dict or pipeline component's outputs. + :type outputs: Dict[str, Union[Output, NodeOutput]] + :return: Built dynamic output attribute dict. + :rtype: OutputsAttrDict + """ + output_dict = {} + for key, val in outputs.items(): + output_dict[key] = self._build_output_for_pipeline(name=key, data=val) + return OutputsAttrDict(output_dict) + + def _build_outputs(self) -> Dict[str, Output]: + """Build outputs of this pipeline to a dict which maps output to actual + value. + + The built dictionary's format aligns with component job's output yaml, + un-configured outputs will be None, eg: + {"eval_output": "${{jobs.eval.outputs.eval_output}}", "un_configured": None} + + :return: The output dict + :rtype: Dict[str, Output] + """ + outputs = {} + for name, output in self.outputs.items(): # type: ignore + if isinstance(output, NodeOutput): + output = output._to_job_output() # pylint: disable=protected-access + outputs[name] = output + return outputs + + def _get_default_input_val(self, val: Any): # type: ignore + # use Default value as data placeholder for unfilled inputs. + # client side need to fill the default value for dsl.pipeline + if isinstance(val, GroupInput): + # Copy default value dict for group + return copy.deepcopy(val.default) + return val.default + + def _update_output_types(self, rest_data_outputs: Dict) -> None: + """Won't clear output type for pipeline level outputs since it's required in rest object. + + :param rest_data_outputs: The REST data outputs + :type rest_data_outputs: Dict + """ + + +class AutoMLNodeIOMixin(NodeIOMixin): + """Wrap outputs of automl node and build data bindings dynamically.""" + + def __init__(self, **kwargs): # type: ignore + # add a inputs field to align with other nodes + self.inputs = {} + super(AutoMLNodeIOMixin, self).__init__(**kwargs) + if getattr(self, "outputs", None): + self._outputs = self._build_outputs_dict(self.outputs or {}) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_load_component.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_load_component.py new file mode 100644 index 00000000..60c4cbe7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_load_component.py @@ -0,0 +1,313 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access +from typing import Any, Callable, Dict, List, Mapping, Optional, Union, cast + +from marshmallow import INCLUDE + +from azure.ai.ml import Output +from azure.ai.ml._schema import NestedField +from azure.ai.ml._schema.pipeline.component_job import SweepSchema +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, SOURCE_PATH_CONTEXT_KEY, CommonYamlFields +from azure.ai.ml.constants._component import ControlFlowType, DataTransferTaskType, NodeType +from azure.ai.ml.constants._compute import ComputeType +from azure.ai.ml.dsl._component_func import to_component_func +from azure.ai.ml.dsl._overrides_definition import OverrideDefinition +from azure.ai.ml.entities._builders import ( + BaseNode, + Command, + DataTransferCopy, + DataTransferExport, + DataTransferImport, + Import, + Parallel, + Spark, + Sweep, +) +from azure.ai.ml.entities._builders.condition_node import ConditionNode +from azure.ai.ml.entities._builders.control_flow_node import ControlFlowNode +from azure.ai.ml.entities._builders.do_while import DoWhile +from azure.ai.ml.entities._builders.parallel_for import ParallelFor +from azure.ai.ml.entities._builders.pipeline import Pipeline +from azure.ai.ml.entities._component.component import Component +from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob +from azure.ai.ml.entities._util import get_type_from_spec +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class _PipelineNodeFactory: + """A class to create pipeline node instances from yaml dict or rest objects without hard-coded type check.""" + + def __init__(self) -> None: + self._create_instance_funcs: dict = {} + self._load_from_rest_object_funcs: dict = {} + + self.register_type( + _type=NodeType.COMMAND, + create_instance_func=lambda: Command.__new__(Command), + load_from_rest_object_func=Command._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=NodeType.IMPORT, + create_instance_func=lambda: Import.__new__(Import), + load_from_rest_object_func=Import._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=NodeType.PARALLEL, + create_instance_func=lambda: Parallel.__new__(Parallel), + load_from_rest_object_func=Parallel._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=NodeType.PIPELINE, + create_instance_func=lambda: Pipeline.__new__(Pipeline), + load_from_rest_object_func=Pipeline._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=NodeType.SWEEP, + create_instance_func=lambda: Sweep.__new__(Sweep), + load_from_rest_object_func=Sweep._from_rest_object, + nested_schema=NestedField(SweepSchema, unknown=INCLUDE), + ) + self.register_type( + _type=NodeType.AUTOML, + create_instance_func=None, + load_from_rest_object_func=self._automl_from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=NodeType.SPARK, + create_instance_func=lambda: Spark.__new__(Spark), + load_from_rest_object_func=Spark._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=ControlFlowType.DO_WHILE, + create_instance_func=None, + load_from_rest_object_func=DoWhile._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=ControlFlowType.IF_ELSE, + create_instance_func=None, + load_from_rest_object_func=ConditionNode._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=ControlFlowType.PARALLEL_FOR, + create_instance_func=None, + load_from_rest_object_func=ParallelFor._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type="_".join([NodeType.DATA_TRANSFER, DataTransferTaskType.COPY_DATA]), + create_instance_func=lambda: DataTransferCopy.__new__(DataTransferCopy), + load_from_rest_object_func=DataTransferCopy._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type="_".join([NodeType.DATA_TRANSFER, DataTransferTaskType.IMPORT_DATA]), + create_instance_func=lambda: DataTransferImport.__new__(DataTransferImport), + load_from_rest_object_func=DataTransferImport._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type="_".join([NodeType.DATA_TRANSFER, DataTransferTaskType.EXPORT_DATA]), + create_instance_func=lambda: DataTransferExport.__new__(DataTransferExport), + load_from_rest_object_func=DataTransferExport._from_rest_object, + nested_schema=None, + ) + self.register_type( + _type=NodeType.FLOW_PARALLEL, + create_instance_func=lambda: Parallel.__new__(Parallel), + load_from_rest_object_func=None, + nested_schema=None, + ) + + @classmethod + def _get_func(cls, _type: str, funcs: Dict[str, Callable]) -> Callable: + if _type == NodeType._CONTAINER: + msg = ( + "Component returned by 'list' is abbreviated and can not be used directly, " + "please use result from 'get'." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.COMPONENT, + error_category=ErrorCategory.USER_ERROR, + ) + _type = get_type_from_spec({CommonYamlFields.TYPE: _type}, valid_keys=funcs) + return funcs[_type] + + def get_create_instance_func(self, _type: str) -> Callable[..., BaseNode]: + """Get the function to create a new instance of the node. + + :param _type: The type of the node. + :type _type: str + :return: The create instance function + :rtype: Callable[..., BaseNode] + """ + return self._get_func(_type, self._create_instance_funcs) + + def get_load_from_rest_object_func(self, _type: str) -> Callable: + """Get the function to load a node from a rest object. + + :param _type: The type of the node. + :type _type: str + :return: The `_load_from_rest_object` function + :rtype: Callable[[Any], Union[BaseNode, AutoMLJob, ControlFlowNode]] + """ + return self._get_func(_type, self._load_from_rest_object_funcs) + + def register_type( + self, + _type: str, + *, + create_instance_func: Optional[Callable[..., Union[BaseNode, AutoMLJob]]] = None, + load_from_rest_object_func: Optional[Callable] = None, + nested_schema: Optional[Union[NestedField, List[NestedField]]] = None, + ) -> None: + """Register a type of node. + + :param _type: The type of the node. + :type _type: str + :keyword create_instance_func: A function to create a new instance of the node + :paramtype create_instance_func: typing.Optional[typing.Callable[..., typing.Union[BaseNode, AutoMLJob]]] + :keyword load_from_rest_object_func: A function to load a node from a rest object + :paramtype load_from_rest_object_func: typing.Optional[typing.Callable[[Any], typing.Union[BaseNode, AutoMLJob\ + , ControlFlowNode]]] + :keyword nested_schema: schema/schemas of corresponding nested field, will be used in \ + PipelineJobSchema.jobs.value + :paramtype nested_schema: typing.Optional[typing.Union[NestedField, List[NestedField]]] + """ + if create_instance_func is not None: + self._create_instance_funcs[_type] = create_instance_func + if load_from_rest_object_func is not None: + self._load_from_rest_object_funcs[_type] = load_from_rest_object_func + if nested_schema is not None: + from azure.ai.ml._schema.core.fields import TypeSensitiveUnionField + from azure.ai.ml._schema.pipeline.pipeline_component import PipelineComponentSchema + from azure.ai.ml._schema.pipeline.pipeline_job import PipelineJobSchema + + for declared_fields in [ + PipelineJobSchema._declared_fields, + PipelineComponentSchema._declared_fields, + ]: + jobs_value_field: TypeSensitiveUnionField = declared_fields["jobs"].value_field + if not isinstance(nested_schema, list): + nested_schema = [nested_schema] + for nested_field in nested_schema: + jobs_value_field.insert_type_sensitive_field(type_name=_type, field=nested_field) + + def load_from_dict(self, *, data: dict, _type: Optional[str] = None) -> Union[BaseNode, AutoMLJob]: + """Load a node from a dict. + + :keyword data: A dict containing the node's data. + :paramtype data: dict + :keyword _type: The type of the node. If not specified, it will be inferred from the data. + :paramtype _type: str + :return: The node + :rtype: Union[BaseNode, AutoMLJob] + """ + if _type is None: + _type = data[CommonYamlFields.TYPE] if CommonYamlFields.TYPE in data else NodeType.COMMAND + # todo: refine Hard code for now to support different task type for DataTransfer node + if _type == NodeType.DATA_TRANSFER: + _type = "_".join([NodeType.DATA_TRANSFER, data.get("task", " ")]) + else: + data[CommonYamlFields.TYPE] = _type + + new_instance: Union[BaseNode, AutoMLJob] = self.get_create_instance_func(_type)() + + if isinstance(new_instance, BaseNode): + # parse component + component_key = new_instance._get_component_attr_name() + if component_key in data and isinstance(data[component_key], dict): + data[component_key] = Component._load( + data=data[component_key], + yaml_path=data[component_key].pop(SOURCE_PATH_CONTEXT_KEY, None), + ) + # TODO: Bug Item number: 2883415 + new_instance.__init__(**data) # type: ignore + return new_instance + + def load_from_rest_object( + self, *, obj: dict, _type: Optional[str] = None, **kwargs: Any + ) -> Union[BaseNode, AutoMLJob, ControlFlowNode]: + """Load a node from a rest object. + + :keyword obj: A rest object containing the node's data. + :paramtype obj: dict + :keyword _type: The type of the node. If not specified, it will be inferred from the data. + :paramtype _type: str + :return: The node + :rtype: Union[BaseNode, AutoMLJob, ControlFlowNode] + """ + + # TODO: Remove in PuP with native import job/component type support in MFE/Designer + if "computeId" in obj and obj["computeId"] and obj["computeId"].endswith("/" + ComputeType.ADF): + _type = NodeType.IMPORT + + if _type is None: + _type = obj[CommonYamlFields.TYPE] if CommonYamlFields.TYPE in obj else NodeType.COMMAND + # todo: refine Hard code for now to support different task type for DataTransfer node + if _type == NodeType.DATA_TRANSFER: + _type = "_".join([NodeType.DATA_TRANSFER, obj.get("task", " ")]) + else: + obj[CommonYamlFields.TYPE] = _type + + res: Union[BaseNode, AutoMLJob, ControlFlowNode] = self.get_load_from_rest_object_func(_type)(obj, **kwargs) + return res + + @classmethod + def _automl_from_rest_object(cls, node: Dict) -> AutoMLJob: + _outputs = cast(Dict[str, Union[str, dict]], node.get("outputs")) + # rest dict outputs -> Output objects + outputs = AutoMLJob._from_rest_outputs(_outputs) + # Output objects -> yaml dict outputs + parsed_outputs = {} + for key, val in outputs.items(): + if isinstance(val, Output): + val = val._to_dict() + parsed_outputs[key] = val + node["outputs"] = parsed_outputs + return AutoMLJob._load_from_dict( + node, + context={BASE_PATH_CONTEXT_KEY: "./"}, + additional_message="Failed to load automl task from backend.", + inside_pipeline=True, + ) + + +def _generate_component_function( + component_entity: Component, + override_definitions: Optional[Mapping[str, OverrideDefinition]] = None, # pylint: disable=unused-argument +) -> Callable[..., Union[Command, Parallel]]: + # Generate a function which returns a component node. + def create_component_func(**kwargs: Any) -> Union[BaseNode, AutoMLJob]: + # todo: refine Hard code for now to support different task type for DataTransfer node + _type = component_entity.type + if _type == NodeType.DATA_TRANSFER: + # TODO: Bug Item number: 2883431 + _type = "_".join([NodeType.DATA_TRANSFER, component_entity.task]) # type: ignore + if component_entity.task == DataTransferTaskType.IMPORT_DATA: # type: ignore + return pipeline_node_factory.load_from_dict( + data={"component": component_entity, "_from_component_func": True, **kwargs}, + _type=_type, + ) + return pipeline_node_factory.load_from_dict( + data={"component": component_entity, "inputs": kwargs, "_from_component_func": True}, + _type=_type, + ) + + res: Callable = to_component_func(component_entity, create_component_func) + return res + + +pipeline_node_factory = _PipelineNodeFactory() diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_pipeline_expression.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_pipeline_expression.py new file mode 100644 index 00000000..49bb8a61 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_pipeline_expression.py @@ -0,0 +1,662 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import re +import tempfile +from collections import namedtuple +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast + +from azure.ai.ml._utils.utils import dump_yaml_to_file, get_all_data_binding_expressions, load_yaml +from azure.ai.ml.constants._common import AZUREML_PRIVATE_FEATURES_ENV_VAR, DefaultOpenEncoding +from azure.ai.ml.constants._component import ComponentParameterTypes, IOConstants +from azure.ai.ml.exceptions import UserErrorException + +if TYPE_CHECKING: + from azure.ai.ml.entities._builders import BaseNode + +ExpressionInput = namedtuple("ExpressionInput", ["name", "type", "value"]) +NONE_PARAMETER_TYPE = "None" + + +class PipelineExpressionOperator: + """Support operator in native Python experience.""" + + ADD = "+" + SUB = "-" + MUL = "*" + DIV = "/" + MOD = "%" + POW = "**" + FLOORDIV = "//" + LT = "<" + GT = ">" + LTE = "<=" + GTE = ">=" + EQ = "==" + NE = "!=" + AND = "&" + OR = "|" + XOR = "^" + + +_SUPPORTED_OPERATORS = { + getattr(PipelineExpressionOperator, attr) + for attr in PipelineExpressionOperator.__dict__ + if not attr.startswith("__") +} + + +def _enumerate_operation_combination() -> Dict[str, Union[str, Exception]]: + """Enumerate the result type of binary operations on types + + Leverages `eval` to validate operation and get its result type. + + :return: A dictionary that maps an operation to either: + * A result type + * An Exception + :rtype: Dict[str, Union[str, Exception]] + """ + res: Dict = {} + primitive_types_values = { + NONE_PARAMETER_TYPE: repr(None), + ComponentParameterTypes.BOOLEAN: repr(True), + ComponentParameterTypes.INTEGER: repr(1), + ComponentParameterTypes.NUMBER: repr(1.0), + ComponentParameterTypes.STRING: repr("1"), + } + for type1, operand1 in primitive_types_values.items(): + for type2, operand2 in primitive_types_values.items(): + for operator in _SUPPORTED_OPERATORS: + k = f"{type1} {operator} {type2}" + try: + eval_result = eval(f"{operand1} {operator} {operand2}") # pylint: disable=eval-used # nosec + res[k] = IOConstants.PRIMITIVE_TYPE_2_STR[type(eval_result)] + except TypeError: + error_message = ( + f"Operator '{operator}' is not supported between instances of '{type1}' and '{type2}'." + ) + res[k] = UserErrorException(message=error_message, no_personal_data_message=error_message) + return res + + +# enumerate and store as a lookup table: +# key format is "<operand1_type> <operator> <operand2_type>" +# value can be either result type as str and UserErrorException for invalid operation +_OPERATION_RESULT_TYPE_LOOKUP = _enumerate_operation_combination() + + +class PipelineExpressionMixin: + _SUPPORTED_PRIMITIVE_TYPES = (bool, int, float, str) + _SUPPORTED_PIPELINE_INPUT_TYPES = ( + ComponentParameterTypes.BOOLEAN, + ComponentParameterTypes.INTEGER, + ComponentParameterTypes.NUMBER, + ComponentParameterTypes.STRING, + ) + + def _validate_binary_operation(self, other: Any, operator: str) -> None: + from azure.ai.ml.entities._job.pipeline._io import NodeOutput, PipelineInput + + if ( + other is not None + and not isinstance(other, self._SUPPORTED_PRIMITIVE_TYPES) + and not isinstance(other, (PipelineInput, NodeOutput, PipelineExpression)) + ): + error_message = ( + f"Operator '{operator}' is not supported with {type(other)}; " + "currently only support primitive types (None, bool, int, float and str), " + "pipeline input, component output and expression." + ) + raise UserErrorException(message=error_message, no_personal_data_message=error_message) + + def __add__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.ADD) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.ADD) + + def __radd__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.ADD) + return PipelineExpression._from_operation(other, self, PipelineExpressionOperator.ADD) + + def __sub__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.SUB) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.SUB) + + def __rsub__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.SUB) + return PipelineExpression._from_operation(other, self, PipelineExpressionOperator.SUB) + + def __mul__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.MUL) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.MUL) + + def __rmul__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.MUL) + return PipelineExpression._from_operation(other, self, PipelineExpressionOperator.MUL) + + def __truediv__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.DIV) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.DIV) + + def __rtruediv__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.DIV) + return PipelineExpression._from_operation(other, self, PipelineExpressionOperator.DIV) + + def __mod__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.MOD) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.MOD) + + def __rmod__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.MOD) + return PipelineExpression._from_operation(other, self, PipelineExpressionOperator.MOD) + + def __pow__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.POW) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.POW) + + def __rpow__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.POW) + return PipelineExpression._from_operation(other, self, PipelineExpressionOperator.POW) + + def __floordiv__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.FLOORDIV) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.FLOORDIV) + + def __rfloordiv__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.FLOORDIV) + return PipelineExpression._from_operation(other, self, PipelineExpressionOperator.FLOORDIV) + + def __lt__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.LT) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.LT) + + def __gt__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.GT) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.GT) + + def __le__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.LTE) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.LTE) + + def __ge__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.GTE) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.GTE) + + # TODO: Bug Item number: 2883354 + def __eq__(self, other: Any) -> "PipelineExpression": # type: ignore + self._validate_binary_operation(other, PipelineExpressionOperator.EQ) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.EQ) + + # TODO: Bug Item number: 2883354 + def __ne__(self, other: Any) -> "PipelineExpression": # type: ignore + self._validate_binary_operation(other, PipelineExpressionOperator.NE) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.NE) + + def __and__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.AND) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.AND) + + def __or__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.OR) + return PipelineExpression._from_operation(self, other, PipelineExpressionOperator.OR) + + def __xor__(self, other: Any) -> "PipelineExpression": + self._validate_binary_operation(other, PipelineExpressionOperator.XOR) + return PipelineExpression._from_operation(self, None, PipelineExpressionOperator.XOR) + + def __bool__(self) -> bool: + """Python method that is used to implement truth value testing and the built-in operation bool(). + + This method is not supported as PipelineExpressionMixin is designed to record operation history, + while this method can only return False or True, leading to history breaks here. + As overloadable boolean operators PEP (refer to: https://www.python.org/dev/peps/pep-0335/) + was rejected, logical operations are also not supported. + + :return: True if not inside dsl pipeline func, raises otherwise + :rtype: bool + """ + from azure.ai.ml.dsl._pipeline_component_builder import _is_inside_dsl_pipeline_func + + # note: unexpected bool test always be checking if the object is None; + # so for non-pipeline scenarios, directly return True to avoid unexpected breaking, + # and for pipeline scenarios, will use is not None to replace bool test. + if not _is_inside_dsl_pipeline_func(): + return True + + error_message = f"Type {type(self)} is not supported for operation bool()." + raise UserErrorException(message=error_message, no_personal_data_message=error_message) + + +class PipelineExpression(PipelineExpressionMixin): + """Pipeline expression entity. + + Use PipelineExpression to support simple and trivial parameter transformation tasks with constants + or other parameters. Operations are recorded in this class during executions, and expected result + will be generated for corresponding scenario. + """ + + _PIPELINE_INPUT_PREFIX = ["parent", "inputs"] + _PIPELINE_INPUT_PATTERN = re.compile(pattern=r"parent.inputs.(?P<pipeline_input_name>[^.]+)") + _PIPELINE_INPUT_NAME_GROUP = "pipeline_input_name" + # AML type to Python type, for generated Python code + _TO_PYTHON_TYPE = { + ComponentParameterTypes.BOOLEAN: bool.__name__, + ComponentParameterTypes.INTEGER: int.__name__, + ComponentParameterTypes.NUMBER: float.__name__, + ComponentParameterTypes.STRING: str.__name__, + } + + _INDENTATION = " " + _IMPORT_MLDESIGNER_LINE = "from mldesigner import command_component, Output" + _DECORATOR_LINE = "@command_component(@@decorator_parameters@@)" + _COMPONENT_FUNC_NAME = "expression_func" + _COMPONENT_FUNC_DECLARATION_LINE = ( + f"def {_COMPONENT_FUNC_NAME}(@@component_parameters@@)" " -> Output(type=@@return_type@@):" + ) + _PYTHON_CACHE_FOLDER_NAME = "__pycache__" + + def __init__(self, postfix: List[str], inputs: Dict[str, ExpressionInput]): + self._postfix = postfix + self._inputs = inputs.copy() # including PiplineInput and Output, extra stored name and type + self._result_type: Optional[str] = None + self._created_component = None + + @property + def expression(self) -> str: + """Infix expression string, wrapped with parentheses. + + :return: The infix expression + :rtype: str + """ + return self._to_infix() + + def __str__(self) -> str: + return self._to_data_binding() + + def _data_binding(self) -> str: + return self._to_data_binding() + + def _to_infix(self) -> str: + stack = [] + for token in self._postfix: + if token not in _SUPPORTED_OPERATORS: + stack.append(token) + continue + operand2, operand1 = stack.pop(), stack.pop() + stack.append(f"({operand1} {token} {operand2})") + return stack.pop() + + # pylint: disable=too-many-statements + @staticmethod + def _handle_operand( + operand: "PipelineExpression", + postfix: List[str], + expression_inputs: Dict[str, ExpressionInput], + pipeline_inputs: dict, + ) -> Tuple[List[str], Dict[str, ExpressionInput]]: + """Handle operand in expression, update postfix expression and expression inputs. + + :param operand: The operand + :type operand: "PipelineExpression" + :param postfix: + :type postfix: List[str] + :param expression_inputs: The expression inputs + :type expression_inputs: Dict[str, ExpressionInput] + :param pipeline_inputs: The pipeline inputs + :type pipeline_inputs: dict + :return: A 2-tuple of the updated postfix expression and expression inputs + :rtype: Tuple[List[str], Dict[str, ExpressionInput]] + """ + from azure.ai.ml.entities._job.pipeline._io import NodeOutput, PipelineInput + + def _update_postfix(_postfix: List[str], _old_name: str, _new_name: str) -> List[str]: + return list(map(lambda _x: _new_name if _x == _old_name else _x, _postfix)) + + def _get_or_create_input_name( + _original_name: str, + _operand: Union[PipelineInput, NodeOutput], + _expression_inputs: Dict[str, ExpressionInput], + ) -> str: + """Get or create expression input name as current operand may have appeared in expression. + + :param _original_name: The original name + :type _original_name: str + :param _operand: The expression operand + :type _operand: Union[PipelineInput, NodeOutput] + :param _expression_inputs: The expression inputs + :type _expression_inputs: Dict[str, ExpressionInput] + :return: The input name + :rtype: str + """ + _existing_id_to_name = {id(_v.value): _k for _k, _v in _expression_inputs.items()} + if id(_operand) in _existing_id_to_name: + return _existing_id_to_name[id(_operand)] + # use a counter to generate a unique name for current operand + _name, _counter = _original_name, 0 + while _name in _expression_inputs: + _name = f"{_original_name}_{_counter}" + _counter += 1 + return _name + + def _handle_pipeline_input( + _pipeline_input: PipelineInput, + _postfix: List[str], + _expression_inputs: Dict[str, ExpressionInput], + ) -> Tuple[List[str], dict]: + _name = _pipeline_input._port_name + # 1. use name with counter for pipeline input; 2. add component's name to component output + if _name in _expression_inputs: + _seen_input = _expression_inputs[_name] + if isinstance(_seen_input.value, PipelineInput): + _name = _get_or_create_input_name(_name, _pipeline_input, _expression_inputs) + else: + _expression_inputs.pop(_name) + _new_name = f"{_seen_input.value._owner.component.name}__{_seen_input.value._port_name}" + _postfix = _update_postfix(_postfix, _name, _new_name) + _expression_inputs[_new_name] = ExpressionInput(_new_name, _seen_input.type, _seen_input) + _postfix.append(_name) + + param_input = pipeline_inputs + for group_name in _pipeline_input._group_names: + param_input = param_input[group_name].values + _expression_inputs[_name] = ExpressionInput( + _name, param_input[_pipeline_input._port_name].type, _pipeline_input + ) + return _postfix, _expression_inputs + + def _handle_component_output( + _component_output: NodeOutput, + _postfix: List[str], + _expression_inputs: Dict[str, ExpressionInput], + ) -> Tuple[List[str], dict]: + if _component_output._meta is not None and not _component_output._meta._is_primitive_type: + error_message = ( + f"Component output {_component_output._port_name} in expression must " + f"be a primitive type with value {True!r}, " + f"got {_component_output._meta._is_primitive_type!r}" + ) + raise UserErrorException(message=error_message, no_personal_data_message=error_message) + _name = _component_output._port_name + _has_prefix = False + # "output" is the default output name for command component, add component's name as prefix + if _name == "output": + if _component_output._owner is not None and not isinstance(_component_output._owner.component, str): + _name = f"{_component_output._owner.component.name}__output" + _has_prefix = True + # following loop is expected to execute at most twice: + # 1. add component's name to output(s) + # 2. use name with counter + while _name in _expression_inputs: + _seen_input = _expression_inputs[_name] + if isinstance(_seen_input.value, PipelineInput): + if not _has_prefix: + if _component_output._owner is not None and not isinstance( + _component_output._owner.component, str + ): + _name = f"{_component_output._owner.component.name}__{_component_output._port_name}" + _has_prefix = True + continue + _name = _get_or_create_input_name(_name, _component_output, _expression_inputs) + else: + if not _has_prefix: + _expression_inputs.pop(_name) + _new_name = f"{_seen_input.value._owner.component.name}__{_seen_input.value._port_name}" + _postfix = _update_postfix(_postfix, _name, _new_name) + _expression_inputs[_new_name] = ExpressionInput(_new_name, _seen_input.type, _seen_input) + if _component_output._owner is not None and not isinstance( + _component_output._owner.component, str + ): + _name = f"{_component_output._owner.component.name}__{_component_output._port_name}" + _has_prefix = True + _name = _get_or_create_input_name(_name, _component_output, _expression_inputs) + _postfix.append(_name) + _expression_inputs[_name] = ExpressionInput(_name, _component_output.type, _component_output) + return _postfix, _expression_inputs + + if operand is None or isinstance(operand, PipelineExpression._SUPPORTED_PRIMITIVE_TYPES): + postfix.append(repr(operand)) + elif isinstance(operand, PipelineInput): + postfix, expression_inputs = _handle_pipeline_input(operand, postfix, expression_inputs) + elif isinstance(operand, NodeOutput): + postfix, expression_inputs = _handle_component_output(operand, postfix, expression_inputs) + elif isinstance(operand, PipelineExpression): + postfix.extend(operand._postfix.copy()) + expression_inputs.update(operand._inputs.copy()) + return postfix, expression_inputs + + @staticmethod + def _from_operation(operand1: Any, operand2: Any, operator: str) -> "PipelineExpression": + if operator not in _SUPPORTED_OPERATORS: + error_message = ( + f"Operator '{operator}' is not supported operator, " + f"currently supported operators are {','.join(_SUPPORTED_OPERATORS)}." + ) + raise UserErrorException(message=error_message, no_personal_data_message=error_message) + + # get all pipeline input types from builder stack + # TODO: check if there is pipeline input we cannot know its type (missing in `PipelineComponentBuilder.inputs`)? + from azure.ai.ml.dsl._pipeline_component_builder import _definition_builder_stack + + res = _definition_builder_stack.top() + pipeline_inputs = res.inputs if res is not None else {} + postfix: List[str] = [] + inputs: Dict[str, ExpressionInput] = {} + postfix, inputs = PipelineExpression._handle_operand(operand1, postfix, inputs, pipeline_inputs) + postfix, inputs = PipelineExpression._handle_operand(operand2, postfix, inputs, pipeline_inputs) + postfix.append(operator) + return PipelineExpression(postfix, inputs) + + @property + def _string_concatenation(self) -> bool: + """If all operands are string and operations are addition, it is a string concatenation expression. + + :return: Whether this represents string concatenation + :rtype: bool + """ + for token in self._postfix: + # operator can only be "+" for string concatenation + if token in _SUPPORTED_OPERATORS: + if token != PipelineExpressionOperator.ADD: + return False + continue + # constant and PiplineInput should be type string + if token in self._inputs: + if self._inputs[token].type != ComponentParameterTypes.STRING: + return False + else: + if not isinstance(eval(token), str): # pylint: disable=eval-used # nosec + return False + return True + + def _to_data_binding(self) -> str: + """Convert operands to data binding and concatenate them in the order of postfix expression. + + :return: The data binding + :rtype: str + """ + if not self._string_concatenation: + error_message = ( + "Only string concatenation expression is supported to convert to data binding, " + f"current expression is '{self.expression}'." + ) + raise UserErrorException(message=error_message, no_personal_data_message=error_message) + + stack = [] + for token in self._postfix: + if token != PipelineExpressionOperator.ADD: + if token in self._inputs: + stack.append(self._inputs[token].value._data_binding()) + else: + stack.append(eval(token)) # pylint: disable=eval-used # nosec + continue + operand2, operand1 = stack.pop(), stack.pop() + stack.append(operand1 + operand2) + res: str = stack.pop() + return res + + def resolve(self) -> Union[str, "BaseNode"]: + """Resolve expression to data binding or component, depend on the operations. + + :return: The data binding string or the component + :rtype: Union[str, BaseNode] + """ + if self._string_concatenation: + return self._to_data_binding() + return cast(Union[str, "BaseNode"], self._create_component()) + + @staticmethod + def parse_pipeline_inputs_from_data_binding(data_binding: str) -> List[str]: + """Parse all PipelineInputs name from data binding expression. + + :param data_binding: Data binding expression + :type data_binding: str + :return: List of PipelineInput's name from given data binding expression + :rtype: List[str] + """ + pipeline_input_names = [] + for single_data_binding in get_all_data_binding_expressions( + value=data_binding, + binding_prefix=PipelineExpression._PIPELINE_INPUT_PREFIX, + is_singular=False, + ): + m = PipelineExpression._PIPELINE_INPUT_PATTERN.match(single_data_binding) + # `get_all_data_binding_expressions` should work as pre-filter, so no need to concern `m` is None + if m is not None: + pipeline_input_names.append(m.group(PipelineExpression._PIPELINE_INPUT_NAME_GROUP)) + return pipeline_input_names + + @staticmethod + def _get_operation_result_type(type1: str, operator: str, type2: str) -> str: + def _validate_operand_type(_type: str) -> None: + if _type != NONE_PARAMETER_TYPE and _type not in PipelineExpression._SUPPORTED_PIPELINE_INPUT_TYPES: + error_message = ( + f"Pipeline input type {_type!r} is not supported in expression; " + f"currently only support None, " + + ", ".join(PipelineExpression._SUPPORTED_PIPELINE_INPUT_TYPES) + + "." + ) + raise UserErrorException(message=error_message, no_personal_data_message=error_message) + + _validate_operand_type(type1) + _validate_operand_type(type2) + operation = f"{type1} {operator} {type2}" + lookup_value = _OPERATION_RESULT_TYPE_LOOKUP.get(operation) + if isinstance(lookup_value, str): + return lookup_value # valid operation, return result type + _user_exception: UserErrorException = lookup_value + raise _user_exception # invalid operation, raise UserErrorException + + def _get_operand_type(self, operand: str) -> str: + if operand in self._inputs: + res: str = self._inputs[operand].type + return res + primitive_type = type(eval(operand)) # pylint: disable=eval-used # nosec + res_type: str = IOConstants.PRIMITIVE_TYPE_2_STR.get(primitive_type, NONE_PARAMETER_TYPE) + return res_type + + @property + def _component_code(self) -> str: + def _generate_function_code_lines() -> Tuple[List[str], str]: + """Return lines of code and return type. + + :return: A 2-tuple of (function body, return type name) + :rtype: Tuple[List[str], str] + """ + _inter_id, _code, _stack = 0, [], [] + _line_recorder: Dict = {} + for _token in self._postfix: + if _token not in _SUPPORTED_OPERATORS: + _type = self._get_operand_type(_token) + _stack.append((_token, _type)) + continue + _operand2, _type2 = _stack.pop() + _operand1, _type1 = _stack.pop() + _current_line = f"{_operand1} {_token} {_operand2}" + if _current_line in _line_recorder: + _inter_var, _inter_var_type = _line_recorder[_current_line] + else: + _inter_var = f"inter_var_{_inter_id}" + _inter_id += 1 + _inter_var_type = self._get_operation_result_type(_type1, _token, _type2) + _code.append(f"{self._INDENTATION}{_inter_var} = {_current_line}") + _line_recorder[_current_line] = (_inter_var, _inter_var_type) + _stack.append((_inter_var, _inter_var_type)) + _return_var, _result_type = _stack.pop() + _code.append(f"{self._INDENTATION}return {_return_var}") + return _code, _result_type + + def _generate_function_decorator_and_declaration_lines(_return_type: str) -> List[str]: + # decorator parameters + _display_name = f'{self._INDENTATION}display_name="Expression: {self.expression}",' + _decorator_parameters = "\n" + "\n".join([_display_name]) + "\n" + # component parameters + _component_parameters = [] + for _name in sorted(self._inputs): + _type = self._TO_PYTHON_TYPE[self._inputs[_name].type] + _component_parameters.append(f"{_name}: {_type}") + _component_parameters_str = ( + "\n" + + "\n".join( + [f"{self._INDENTATION}{_component_parameter}," for _component_parameter in _component_parameters] + ) + + "\n" + ) + return [ + self._IMPORT_MLDESIGNER_LINE + "\n\n", + self._DECORATOR_LINE.replace("@@decorator_parameters@@", _decorator_parameters), + self._COMPONENT_FUNC_DECLARATION_LINE.replace( + "@@component_parameters@@", _component_parameters_str + ).replace("@@return_type@@", f'"{_return_type}"'), + ] + + lines, result_type = _generate_function_code_lines() + self._result_type = result_type + code = _generate_function_decorator_and_declaration_lines(result_type) + lines + return "\n".join(code) + "\n" + + def _create_component(self) -> Any: + def _generate_python_file(_folder: Path) -> None: + _folder.mkdir() + with open(_folder / "expression_component.py", "w", encoding=DefaultOpenEncoding.WRITE) as _f: + _f.write(self._component_code) + + def _generate_yaml_file(_path: Path) -> None: + _data_folder = Path(__file__).parent / "data" + # update YAML content from template and dump + with open(_data_folder / "expression_component_template.yml", "r", encoding=DefaultOpenEncoding.READ) as _f: + _data = load_yaml(_f) + _data["display_name"] = f"Expression: {self.expression}" + _data["inputs"] = {} + _data["outputs"]["output"]["type"] = self._result_type + _command_inputs_items = [] + for _name in sorted(self._inputs): + _type = self._inputs[_name].type + _data["inputs"][_name] = {"type": _type} + _command_inputs_items.append(_name + '="${{inputs.' + _name + '}}"') + _command_inputs_string = " ".join(_command_inputs_items) + _command_output_string = 'output="${{outputs.output}}"' + _command = ( + "mldesigner execute --source expression_component.py --name expression_func" + " --inputs " + _command_inputs_string + " --outputs " + _command_output_string + ) + _data["command"] = _data["command"].format(command_placeholder=_command) + dump_yaml_to_file(_path, _data) + + if self._created_component is None: + tmp_folder = Path(tempfile.mkdtemp()) + code_folder = tmp_folder / "src" + yaml_path = tmp_folder / "component_spec.yml" + _generate_python_file(code_folder) + _generate_yaml_file(yaml_path) + + from azure.ai.ml import load_component + + component_func = load_component(yaml_path) + component_kwargs = {k: v.value for k, v in self._inputs.items()} + self._created_component = component_func(**component_kwargs) + if self._created_component is not None: + self._created_component.environment_variables = {AZUREML_PRIVATE_FEATURES_ENV_VAR: "true"} + return self._created_component diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_pipeline_job_helpers.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_pipeline_job_helpers.py new file mode 100644 index 00000000..3a7d89e7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/_pipeline_job_helpers.py @@ -0,0 +1,182 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +import re +from typing import Dict, List, Tuple, Type, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import InputDeliveryMode +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobInput as RestJobInput +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobOutput as RestJobOutput +from azure.ai.ml._restclient.v2023_04_01_preview.models import Mpi, PyTorch, Ray, TensorFlow +from azure.ai.ml.constants._component import ComponentJobConstants +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._job._input_output_helpers import ( + INPUT_MOUNT_MAPPING_FROM_REST, + INPUT_MOUNT_MAPPING_TO_REST, + OUTPUT_MOUNT_MAPPING_FROM_REST, + OUTPUT_MOUNT_MAPPING_TO_REST, +) +from azure.ai.ml.entities._util import normalize_job_input_output_type +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +def process_sdk_component_job_io( + io: Dict, + io_binding_regex_list: List[str], +) -> Tuple: + """Separates SDK ComponentJob inputs that are data bindings (i.e. string inputs prefixed with 'inputs.' or + 'outputs.') and dataset and literal inputs/outputs. + + :param io: Input or output dictionary of an SDK ComponentJob + :type io: Dict[str, Union[str, float, bool, Input]] + :param io_binding_regex_list: A list of regexes for io bindings + :type io_binding_regex_list: List[str] + :return: A tuple of dictionaries: + * One mapping inputs to REST formatted ComponentJobInput/ComponentJobOutput for data binding io. + * The other dictionary contains any IO that is not a databinding that is yet to be turned into REST form + :rtype: Tuple[Dict[str, str], Dict[str, Union[str, float, bool, Input]]] + """ + io_bindings: Dict = {} + dataset_literal_io: Dict = {} + legacy_io_binding_regex_list = [ + ComponentJobConstants.LEGACY_INPUT_PATTERN, + ComponentJobConstants.LEGACY_OUTPUT_PATTERN, + ] + for io_name, io_value in io.items(): + if isinstance(io_value, (Input, Output)) and isinstance(io_value.path, str): + mode = io_value.mode + path = io_value.path + name = io_value.name if hasattr(io_value, "name") else None + version = io_value.version if hasattr(io_value, "version") else None + if any(re.match(item, path) for item in io_binding_regex_list): + # Yaml syntax requires using ${{}} to enclose inputs and outputs bindings + # io_bindings[io_name] = io_value + io_bindings.update({io_name: {"value": path}}) + # add mode to literal value for binding input + if mode: + if isinstance(io_value, Input): + io_bindings[io_name].update({"mode": INPUT_MOUNT_MAPPING_TO_REST[mode]}) + else: + io_bindings[io_name].update({"mode": OUTPUT_MOUNT_MAPPING_TO_REST[mode]}) + if name or version: + assert isinstance(io_value, Output) + if name: + io_bindings[io_name].update({"name": name}) + if version: + io_bindings[io_name].update({"version": version}) + if isinstance(io_value, Output) and io_value.name is not None: + # when the output should be registered, + # we add io_value to dataset_literal_io for further to_rest_data_outputs + dataset_literal_io[io_name] = io_value + elif any(re.match(item, path) for item in legacy_io_binding_regex_list): + new_format = path.replace("{{", "{{parent.") + msg = "{} has changed to {}, please change to use new format." + raise ValidationException( + message=msg.format(path, new_format), + no_personal_data_message=msg.format("[io_value]", "[io_value_new_format]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + else: + dataset_literal_io[io_name] = io_value + else: + # Collect non-input data inputs + dataset_literal_io[io_name] = io_value + return io_bindings, dataset_literal_io + + +def from_dict_to_rest_io( + io: Dict[str, Union[str, dict]], + rest_object_class: Union[Type[RestJobInput], Type[RestJobOutput]], + io_binding_regex_list: List[str], +) -> Tuple[Dict[str, str], Dict[str, Union[RestJobInput, RestJobOutput]]]: + """Translate rest JObject dictionary to rest inputs/outputs and bindings. + + :param io: Input or output dictionary. + :type io: Dict[str, Union[str, dict]] + :param rest_object_class: RestJobInput or RestJobOutput + :type rest_object_class: Union[Type[RestJobInput], Type[RestJobOutput]] + :param io_binding_regex_list: A list of regexes for io bindings + :type io_binding_regex_list: List[str] + :return: Map from IO name to IO bindings and Map from IO name to IO objects. + :rtype: Tuple[Dict[str, str], Dict[str, Union[RestJobInput, RestJobOutput]]] + """ + io_bindings: dict = {} + rest_io_objects = {} + DIRTY_MODE_MAPPING = { + "Mount": InputDeliveryMode.READ_ONLY_MOUNT, + "RoMount": InputDeliveryMode.READ_ONLY_MOUNT, + "RwMount": InputDeliveryMode.READ_WRITE_MOUNT, + } + for key, val in io.items(): + if isinstance(val, dict): + # convert the input of camel to snake to be compatible with the Jun api + # todo: backend help convert node level input/output type + normalize_job_input_output_type(val) + + # Add casting as sometimes we got value like 1(int) + io_value = str(val.get("value", "")) + io_mode = val.get("mode", None) + io_name = val.get("name", None) + io_version = val.get("version", None) + if any(re.match(item, io_value) for item in io_binding_regex_list): + io_bindings.update({key: {"path": io_value}}) + # add mode to literal value for binding input + if io_mode: + # deal with dirty mode data submitted before + if io_mode in DIRTY_MODE_MAPPING: + io_mode = DIRTY_MODE_MAPPING[io_mode] + val["mode"] = io_mode + if io_mode in OUTPUT_MOUNT_MAPPING_FROM_REST: + io_bindings[key].update({"mode": OUTPUT_MOUNT_MAPPING_FROM_REST[io_mode]}) + else: + io_bindings[key].update({"mode": INPUT_MOUNT_MAPPING_FROM_REST[io_mode]}) + # add name and version for binding input + if io_name or io_version: + assert rest_object_class.__name__ == "JobOutput" + # current code only support dump name and version for JobOutput + # this assert can be deleted if we need to dump name/version for JobInput + if io_name: + io_bindings[key].update({"name": io_name}) + if io_version: + io_bindings[key].update({"version": io_version}) + if not io_mode and not io_name and not io_version: + io_bindings[key] = io_value + else: + if rest_object_class.__name__ == "JobOutput": + # current code only support dump name and version for JobOutput + # this condition can be deleted if we need to dump name/version for JobInput + if "name" in val.keys(): + val["asset_name"] = val.pop("name") + if "version" in val.keys(): + val["asset_version"] = val.pop("version") + rest_obj = rest_object_class.from_dict(val) + rest_io_objects[key] = rest_obj + else: + msg = "Got unsupported type of input/output: {}:" + f"{type(val)}" + raise ValidationException( + message=msg.format(val), + no_personal_data_message=msg.format("[val]"), + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) + return io_bindings, rest_io_objects + + +def from_dict_to_rest_distribution(distribution_dict: Dict) -> Union[PyTorch, Mpi, TensorFlow, Ray]: + target_type = distribution_dict["distribution_type"].lower() + if target_type == "pytorch": + return PyTorch(**distribution_dict) + if target_type == "mpi": + return Mpi(**distribution_dict) + if target_type == "tensorflow": + return TensorFlow(**distribution_dict) + if target_type == "ray": + return Ray(**distribution_dict) + msg = "Distribution type must be pytorch, mpi, tensorflow or ray: {}".format(target_type) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.PIPELINE, + error_category=ErrorCategory.USER_ERROR, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/data/expression_component_template.yml b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/data/expression_component_template.yml new file mode 100644 index 00000000..10d391aa --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/data/expression_component_template.yml @@ -0,0 +1,16 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json +type: command + +name: expression_component +version: 1 + +outputs: + output: + is_control: true + +code: ./src + +environment: azureml://registries/azureml/environments/mldesigner/labels/latest + +command: >- + {command_placeholder} diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/pipeline_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/pipeline_job.py new file mode 100644 index 00000000..7ddbbc46 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/pipeline_job.py @@ -0,0 +1,711 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access +import itertools +import logging +import typing +from functools import partial +from pathlib import Path +from typing import Any, Dict, Generator, List, Optional, Union, cast + +from typing_extensions import Literal + +from azure.ai.ml._restclient.v2024_01_01_preview.models import JobBase +from azure.ai.ml._restclient.v2024_01_01_preview.models import PipelineJob as RestPipelineJob +from azure.ai.ml._schema import PathAwareSchema +from azure.ai.ml._schema.pipeline.pipeline_job import PipelineJobSchema +from azure.ai.ml._utils._arm_id_utils import get_resource_name_from_arm_id_safe +from azure.ai.ml._utils.utils import ( + camel_to_snake, + is_data_binding_expression, + is_private_preview_enabled, + transform_dict_keys, +) +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import AZUREML_PRIVATE_FEATURES_ENV_VAR, BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._component import ComponentSource +from azure.ai.ml.constants._job.pipeline import ValidationErrorCode +from azure.ai.ml.entities._builders import BaseNode +from azure.ai.ml.entities._builders.condition_node import ConditionNode +from azure.ai.ml.entities._builders.control_flow_node import LoopNode +from azure.ai.ml.entities._builders.import_node import Import +from azure.ai.ml.entities._builders.parallel import Parallel +from azure.ai.ml.entities._builders.pipeline import Pipeline +from azure.ai.ml.entities._component.component import Component +from azure.ai.ml.entities._component.pipeline_component import PipelineComponent + +# from azure.ai.ml.entities._job.identity import AmlToken, Identity, ManagedIdentity, UserIdentity +from azure.ai.ml.entities._credentials import ( + AmlTokenConfiguration, + ManagedIdentityConfiguration, + UserIdentityConfiguration, + _BaseJobIdentityConfiguration, +) +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._inputs_outputs.group_input import GroupInput +from azure.ai.ml.entities._job._input_output_helpers import ( + from_rest_data_outputs, + from_rest_inputs_to_dataset_literal, + to_rest_data_outputs, + to_rest_dataset_literal_inputs, +) +from azure.ai.ml.entities._job.import_job import ImportJob +from azure.ai.ml.entities._job.job import Job +from azure.ai.ml.entities._job.job_service import JobServiceBase +from azure.ai.ml.entities._job.pipeline._io import PipelineInput, PipelineJobIOMixin +from azure.ai.ml.entities._job.pipeline.pipeline_job_settings import PipelineJobSettings +from azure.ai.ml.entities._mixins import YamlTranslatableMixin +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._validation import MutableValidationResult, PathAwareSchemaValidatableMixin +from azure.ai.ml.exceptions import ErrorTarget, UserErrorException, ValidationException + +module_logger = logging.getLogger(__name__) + + +class PipelineJob(Job, YamlTranslatableMixin, PipelineJobIOMixin, PathAwareSchemaValidatableMixin): + """Pipeline job. + + You should not instantiate this class directly. Instead, you should + use the `@pipeline` decorator to create a `PipelineJob`. + + :param component: Pipeline component version. The field is mutually exclusive with 'jobs'. + :type component: Union[str, ~azure.ai.ml.entities._component.pipeline_component.PipelineComponent] + :param inputs: Inputs to the pipeline job. + :type inputs: dict[str, Union[~azure.ai.ml.entities.Input, str, bool, int, float]] + :param outputs: Outputs of the pipeline job. + :type outputs: dict[str, ~azure.ai.ml.entities.Output] + :param name: Name of the PipelineJob. Defaults to None. + :type name: str + :param description: Description of the pipeline job. Defaults to None + :type description: str + :param display_name: Display name of the pipeline job. Defaults to None + :type display_name: str + :param experiment_name: Name of the experiment the job will be created under. + If None is provided, the experiment will be set to the current directory. Defaults to None + :type experiment_name: str + :param jobs: Pipeline component node name to component object. Defaults to None + :type jobs: dict[str, ~azure.ai.ml.entities._builders.BaseNode] + :param settings: Setting of the pipeline job. Defaults to None + :type settings: ~azure.ai.ml.entities.PipelineJobSettings + :param identity: Identity that the training job will use while running on compute. Defaults to None + :type identity: Union[ + ~azure.ai.ml.entities._credentials.ManagedIdentityConfiguration, + ~azure.ai.ml.entities._credentials.AmlTokenConfiguration, + ~azure.ai.ml.entities._credentials.UserIdentityConfiguration + + ] + :param compute: Compute target name of the built pipeline. Defaults to None + :type compute: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. Defaults to None + :type tags: dict[str, str] + :param kwargs: A dictionary of additional configuration parameters. Defaults to None + :type kwargs: dict + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_pipeline_job_configurations.py + :start-after: [START configure_pipeline_job_and_settings] + :end-before: [END configure_pipeline_job_and_settings] + :language: python + :dedent: 8 + :caption: Shows how to create a pipeline using this class. + """ + + def __init__( + self, + *, + component: Optional[Union[str, PipelineComponent, Component]] = None, + inputs: Optional[Dict[str, Union[Input, str, bool, int, float]]] = None, + outputs: Optional[Dict[str, Output]] = None, + name: Optional[str] = None, + description: Optional[str] = None, + display_name: Optional[str] = None, + experiment_name: Optional[str] = None, + jobs: Optional[Dict[str, BaseNode]] = None, + settings: Optional[PipelineJobSettings] = None, + identity: Optional[ + Union[ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration] + ] = None, + compute: Optional[str] = None, + tags: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> None: + # initialize io + inputs, outputs = inputs or {}, outputs or {} + if isinstance(component, PipelineComponent) and component._source in [ + ComponentSource.DSL, + ComponentSource.YAML_COMPONENT, + ]: + self._inputs = self._build_inputs_dict(inputs, input_definition_dict=component.inputs) + # for pipeline component created pipeline jobs, + # it's output should have same value with the component outputs, + # then override it with given outputs (filter out None value) + pipeline_outputs = {k: v for k, v in (outputs or {}).items() if v} + self._outputs = self._build_pipeline_outputs_dict({**component.outputs, **pipeline_outputs}) + else: + # Build inputs/outputs dict without meta when definition not available + self._inputs = self._build_inputs_dict(inputs) + # for node created pipeline jobs, + # it's output should have same value with the given outputs + self._outputs = self._build_pipeline_outputs_dict(outputs=outputs) + source = kwargs.pop("_source", ComponentSource.CLASS) + if component is None: + component = PipelineComponent( + jobs=jobs, + description=description, + display_name=display_name, + base_path=kwargs.get(BASE_PATH_CONTEXT_KEY), + _source=source, + ) + + # If component is Pipeline component, jobs will be component.jobs + self._jobs = (jobs or {}) if isinstance(component, str) else {} + + self.component: Union[PipelineComponent, str] = cast(Union[PipelineComponent, str], component) + if "type" not in kwargs: + kwargs["type"] = JobType.PIPELINE + if isinstance(component, PipelineComponent): + description = component.description if description is None else description + display_name = component.display_name if display_name is None else display_name + super(PipelineJob, self).__init__( + name=name, + description=description, + tags=tags, + display_name=display_name, + experiment_name=experiment_name, + compute=compute, + **kwargs, + ) + + self._remove_pipeline_input() + self.compute = compute + self._settings: Any = None + self.settings = settings + self.identity = identity + # TODO: remove default code & environment? + self._default_code = None + self._default_environment = None + + @property + def inputs(self) -> Dict: + """Inputs of the pipeline job. + + :return: Inputs of the pipeline job. + :rtype: dict[str, Union[~azure.ai.ml.entities.Input, str, bool, int, float]] + """ + return self._inputs + + @property + def outputs(self) -> Dict[str, Union[str, Output]]: + """Outputs of the pipeline job. + + :return: Outputs of the pipeline job. + :rtype: dict[str, Union[str, ~azure.ai.ml.entities.Output]] + """ + return self._outputs + + @property + def jobs(self) -> Dict: + """Return jobs of pipeline job. + + :return: Jobs of pipeline job. + :rtype: dict + """ + res: dict = self.component.jobs if isinstance(self.component, PipelineComponent) else self._jobs + return res + + @property + def settings(self) -> Optional[PipelineJobSettings]: + """Settings of the pipeline job. + + :return: Settings of the pipeline job. + :rtype: ~azure.ai.ml.entities.PipelineJobSettings + """ + if self._settings is None: + self._settings = PipelineJobSettings() + res: Optional[PipelineJobSettings] = self._settings + return res + + @settings.setter + def settings(self, value: Union[Dict, PipelineJobSettings]) -> None: + """Set the pipeline job settings. + + :param value: The pipeline job settings. + :type value: Union[dict, ~azure.ai.ml.entities.PipelineJobSettings] + """ + if value is not None: + if isinstance(value, PipelineJobSettings): + # since PipelineJobSettings inherit _AttrDict, we need add this branch to distinguish with dict + pass + elif isinstance(value, dict): + value = PipelineJobSettings(**value) + else: + raise TypeError("settings must be PipelineJobSettings or dict but got {}".format(type(value))) + self._settings = value + + @classmethod + def _create_validation_error(cls, message: str, no_personal_data_message: str) -> ValidationException: + return ValidationException( + message=message, + no_personal_data_message=no_personal_data_message, + target=ErrorTarget.PIPELINE, + ) + + @classmethod + def _create_schema_for_validation(cls, context: Any) -> PathAwareSchema: + # import this to ensure that nodes are registered before schema is created. + + return PipelineJobSchema(context=context) + + @classmethod + def _get_skip_fields_in_schema_validation(cls) -> typing.List[str]: + # jobs validations are done in _customized_validate() + return ["component", "jobs"] + + @property + def _skip_required_compute_missing_validation(self) -> Literal[True]: + return True + + def _validate_compute_is_set(self) -> MutableValidationResult: + validation_result = self._create_empty_validation_result() + if self.compute is not None: + return validation_result + if self.settings is not None and self.settings.default_compute is not None: + return validation_result + + if not isinstance(self.component, str): + validation_result.merge_with(self.component._validate_compute_is_set()) + return validation_result + + def _customized_validate(self) -> MutableValidationResult: + """Validate that all provided inputs and parameters are valid for current pipeline and components in it. + + :return: The validation result + :rtype: MutableValidationResult + """ + validation_result = super(PipelineJob, self)._customized_validate() + + if isinstance(self.component, PipelineComponent): + # Merge with pipeline component validate result for structure validation. + # Skip top level parameter missing type error + validation_result.merge_with( + self.component._customized_validate(), + condition_skip=lambda x: x.error_code == ValidationErrorCode.PARAMETER_TYPE_UNKNOWN + and x.yaml_path.startswith("inputs"), + ) + # Validate compute + validation_result.merge_with(self._validate_compute_is_set()) + # Validate Input + validation_result.merge_with(self._validate_input()) + # Validate initialization & finalization jobs + validation_result.merge_with(self._validate_init_finalize_job()) + + return validation_result + + def _validate_input(self) -> MutableValidationResult: + validation_result = self._create_empty_validation_result() + if not isinstance(self.component, str): + # TODO(1979547): refine this logic: not all nodes have `_get_input_binding_dict` method + used_pipeline_inputs = set( + itertools.chain( + *[ + self.component._get_input_binding_dict(node if not isinstance(node, LoopNode) else node.body)[0] + for node in self.jobs.values() + if not isinstance(node, ConditionNode) + # condition node has no inputs + ] + ) + ) + # validate inputs + if not isinstance(self.component, Component): + return validation_result + for key, meta in self.component.inputs.items(): + if key not in used_pipeline_inputs: # pylint: disable=possibly-used-before-assignment + # Only validate inputs certainly used. + continue + # raise error when required input with no default value not set + if ( + self.inputs.get(key, None) is None # input not provided + and meta.optional is not True # and it's required + and meta.default is None # and it does not have default + ): + name = self.name or self.display_name + name = f"{name!r} " if name else "" + validation_result.append_error( + yaml_path=f"inputs.{key}", + message=f"Required input {key!r} for pipeline {name}not provided.", + ) + return validation_result + + def _validate_init_finalize_job(self) -> MutableValidationResult: # pylint: disable=too-many-statements + from azure.ai.ml.entities._job.pipeline._io import InputOutputBase, _GroupAttrDict + + validation_result = self._create_empty_validation_result() + # subgraph (PipelineComponent) should not have on_init/on_finalize set + for job_name, job in self.jobs.items(): + if job.type != "pipeline": + continue + if job.settings.on_init: + validation_result.append_error( + yaml_path=f"jobs.{job_name}.settings.on_init", + message="On_init is not supported for pipeline component.", + ) + if job.settings.on_finalize: + validation_result.append_error( + yaml_path=f"jobs.{job_name}.settings.on_finalize", + message="On_finalize is not supported for pipeline component.", + ) + + on_init = None + on_finalize = None + + if self.settings is not None: + # quick return if neither on_init nor on_finalize is set + if self.settings.on_init is None and self.settings.on_finalize is None: + return validation_result + + on_init, on_finalize = self.settings.on_init, self.settings.on_finalize + + append_on_init_error = partial(validation_result.append_error, "settings.on_init") + append_on_finalize_error = partial(validation_result.append_error, "settings.on_finalize") + # on_init and on_finalize cannot be same + if on_init == on_finalize: + append_on_init_error(f"Invalid on_init job {on_init}, it should be different from on_finalize.") + append_on_finalize_error(f"Invalid on_finalize job {on_finalize}, it should be different from on_init.") + # pipeline should have at least one normal node + if len(set(self.jobs.keys()) - {on_init, on_finalize}) == 0: + validation_result.append_error(yaml_path="jobs", message="No other job except for on_init/on_finalize job.") + + def _is_control_flow_node(_validate_job_name: str) -> bool: + from azure.ai.ml.entities._builders.control_flow_node import ControlFlowNode + + _validate_job = self.jobs[_validate_job_name] + return issubclass(type(_validate_job), ControlFlowNode) + + def _is_isolated_job(_validate_job_name: str) -> bool: + def _try_get_data_bindings( + _name: str, _input_output_data: Union["_GroupAttrDict", "InputOutputBase"] + ) -> Optional[List]: + """Try to get data bindings from input/output data, return None if not found. + :param _name: The name to use when flattening GroupAttrDict + :type _name: str + :param _input_output_data: The input/output data + :type _input_output_data: Union[_GroupAttrDict, str, InputOutputBase] + :return: A list of data bindings, or None if not found + :rtype: Optional[List[str]] + """ + # handle group input + if GroupInput._is_group_attr_dict(_input_output_data): + _new_input_output_data: _GroupAttrDict = cast(_GroupAttrDict, _input_output_data) + # flatten to avoid nested cases + flattened_values: List[Input] = list(_new_input_output_data.flatten(_name).values()) + # handle invalid empty group + if len(flattened_values) == 0: + return None + return [_value.path for _value in flattened_values] + _input_output_data = _input_output_data._data + if isinstance(_input_output_data, str): + return [_input_output_data] + if not hasattr(_input_output_data, "_data_binding"): + return None + return [_input_output_data._data_binding()] + + _validate_job = self.jobs[_validate_job_name] + # no input to validate job + for _input_name in _validate_job.inputs: + _data_bindings = _try_get_data_bindings(_input_name, _validate_job.inputs[_input_name]) + if _data_bindings is None: + continue + for _data_binding in _data_bindings: + if is_data_binding_expression(_data_binding, ["parent", "jobs"]): + return False + # no output from validate job - iterate other jobs input(s) to validate + for _job_name, _job in self.jobs.items(): + # exclude control flow node as it does not have inputs + if _is_control_flow_node(_job_name): + continue + for _input_name in _job.inputs: + _data_bindings = _try_get_data_bindings(_input_name, _job.inputs[_input_name]) + if _data_bindings is None: + continue + for _data_binding in _data_bindings: + if is_data_binding_expression(_data_binding, ["parent", "jobs", _validate_job_name]): + return False + return True + + # validate on_init + if on_init is not None: + if on_init not in self.jobs: + append_on_init_error(f"On_init job name {on_init} not exists in jobs.") + else: + if _is_control_flow_node(on_init): + append_on_init_error("On_init job should not be a control flow node.") + elif not _is_isolated_job(on_init): + append_on_init_error("On_init job should not have connection to other execution node.") + # validate on_finalize + if on_finalize is not None: + if on_finalize not in self.jobs: + append_on_finalize_error(f"On_finalize job name {on_finalize} not exists in jobs.") + else: + if _is_control_flow_node(on_finalize): + append_on_finalize_error("On_finalize job should not be a control flow node.") + elif not _is_isolated_job(on_finalize): + append_on_finalize_error("On_finalize job should not have connection to other execution node.") + return validation_result + + def _remove_pipeline_input(self) -> None: + """Remove None pipeline input.If not remove, it will pass "None" to backend.""" + redundant_pipeline_inputs = [] + for pipeline_input_name, pipeline_input in self._inputs.items(): + if isinstance(pipeline_input, PipelineInput) and pipeline_input._data is None: + redundant_pipeline_inputs.append(pipeline_input_name) + for redundant_pipeline_input in redundant_pipeline_inputs: + self._inputs.pop(redundant_pipeline_input) + + def _check_private_preview_features(self) -> None: + """Checks is private preview features included in pipeline. + + If private preview environment not set, raise exception. + """ + if not is_private_preview_enabled(): + error_msg = ( + "{} is a private preview feature, " + f"please set environment variable {AZUREML_PRIVATE_FEATURES_ENV_VAR} to true to use it." + ) + # check has not supported nodes + for _, node in self.jobs.items(): + # TODO: Remove in PuP + if isinstance(node, (ImportJob, Import)): + msg = error_msg.format("Import job in pipeline") + raise UserErrorException(message=msg, no_personal_data_message=msg) + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "Pipeline": + """Translate a command job to a pipeline node when load schema. + + (Write a pipeline job as node in yaml is not supported presently.) + + :param context: Context of command job YAML file. + :type context: dict + :return: Translated command component. + :rtype: Pipeline + """ + component = self._to_component(context, **kwargs) + + return Pipeline( + component=component, + compute=self.compute, + # Need to supply the inputs with double curly. + inputs=self.inputs, + outputs=self.outputs, + description=self.description, + tags=self.tags, + display_name=self.display_name, + properties=self.properties, + ) + + def _to_rest_object(self) -> JobBase: + """Build current parameterized pipeline instance to a pipeline job object before submission. + + :return: Rest pipeline job. + :rtype: JobBase + """ + # Check if there are private preview features in it + self._check_private_preview_features() + + # Build the inputs to dict. Handle both value & binding assignment. + # Example: { + # "input_data": {"data": {"path": "path/to/input/data"}, "mode"="Mount"}, + # "input_value": 10, + # "learning_rate": "${{jobs.step1.inputs.learning_rate}}" + # } + built_inputs = self._build_inputs() + + # Build the outputs to dict + # example: {"eval_output": "${{jobs.eval.outputs.eval_output}}"} + built_outputs = self._build_outputs() + + if self.settings is not None: + settings_dict = self.settings._to_dict() + + if isinstance(self.component, PipelineComponent): + source = self.component._source + # Build the jobs to dict + rest_component_jobs = self.component._build_rest_component_jobs() + else: + source = ComponentSource.REMOTE_WORKSPACE_JOB + rest_component_jobs = {} + # add _source on pipeline job.settings + if "_source" not in settings_dict: # pylint: disable=possibly-used-before-assignment + settings_dict.update({"_source": source}) + + # TODO: Revisit this logic when multiple types of component jobs are supported + rest_compute = self.compute + # This will be resolved in job_operations _resolve_arm_id_or_upload_dependencies. + component_id = self.component if isinstance(self.component, str) else self.component.id + + # TODO remove it in the future. + # MFE not support pass None or empty input value. Remove the empty inputs in pipeline job. + built_inputs = {k: v for k, v in built_inputs.items() if v is not None and v != ""} + + pipeline_job = RestPipelineJob( + compute_id=rest_compute, + component_id=component_id, + display_name=self.display_name, + tags=self.tags, + description=self.description, + properties=self.properties, + experiment_name=self.experiment_name, + jobs=rest_component_jobs, + inputs=to_rest_dataset_literal_inputs(built_inputs, job_type=self.type), + outputs=to_rest_data_outputs(built_outputs), + settings=settings_dict, + services={k: v._to_rest_object() for k, v in self.services.items()} if self.services else None, + identity=self.identity._to_job_rest_object() if self.identity else None, + ) + + rest_job = JobBase(properties=pipeline_job) + rest_job.name = self.name + return rest_job + + @classmethod + def _load_from_rest(cls, obj: JobBase) -> "PipelineJob": + """Build a pipeline instance from rest pipeline object. + + :param obj: The REST Pipeline Object + :type obj: JobBase + :return: pipeline job. + :rtype: PipelineJob + """ + properties: RestPipelineJob = obj.properties + # Workaround for BatchEndpoint as these fields are not filled in + # Unpack the inputs + from_rest_inputs = from_rest_inputs_to_dataset_literal(properties.inputs) or {} + from_rest_outputs = from_rest_data_outputs(properties.outputs) or {} + # Unpack the component jobs + sub_nodes = PipelineComponent._resolve_sub_nodes(properties.jobs) if properties.jobs else {} + # backend may still store Camel settings, eg: DefaultDatastore, translate them to snake when load back + settings_dict = transform_dict_keys(properties.settings, camel_to_snake) if properties.settings else None + settings_sdk = PipelineJobSettings(**settings_dict) if settings_dict else PipelineJobSettings() + # Create component or use component id + if getattr(properties, "component_id", None): + component = properties.component_id + else: + component = PipelineComponent._load_from_rest_pipeline_job( + { + "inputs": from_rest_inputs, + "outputs": from_rest_outputs, + "display_name": properties.display_name, + "description": properties.description, + "jobs": sub_nodes, + } + ) + + job = PipelineJob( + component=component, + inputs=from_rest_inputs, + outputs=from_rest_outputs, + name=obj.name, + id=obj.id, + jobs=sub_nodes, + display_name=properties.display_name, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + services=JobServiceBase._from_rest_job_services(properties.services) if properties.services else None, + compute=get_resource_name_from_arm_id_safe(properties.compute_id), + settings=settings_sdk, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + ) + + return job + + def _to_dict(self) -> Dict: + res: dict = self._dump_for_validation() + return res + + @classmethod + def _component_items_from_path(cls, data: Dict) -> Generator: + if "jobs" in data: + for node_name, job_instance in data["jobs"].items(): + potential_component_path = job_instance["component"] if "component" in job_instance else None + if isinstance(potential_component_path, str) and potential_component_path.startswith("file:"): + yield node_name, potential_component_path + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "PipelineJob": + path_first_occurrence: dict = {} + component_first_occurrence = {} + for node_name, component_path in cls._component_items_from_path(data): + if component_path in path_first_occurrence: + component_first_occurrence[node_name] = path_first_occurrence[component_path] + # set components to be replaced here may break the validation logic + else: + path_first_occurrence[component_path] = node_name + + # use this instead of azure.ai.ml.entities._util.load_from_dict to avoid parsing + loaded_schema = cls._create_schema_for_validation(context=context).load(data, **kwargs) + + # replace repeat component with first occurrence to reduce arm id resolution + # current load yaml file logic is in azure.ai.ml._schema.core.schema.YamlFileSchema.load_from_file + # is it possible to load the same yaml file only once in 1 pipeline loading? + for node_name, first_occurrence in component_first_occurrence.items(): + job = loaded_schema["jobs"][node_name] + job._component = loaded_schema["jobs"][first_occurrence].component + # For Parallel job, should also align task attribute which is usually from component.task + if isinstance(job, Parallel): + job.task = job._component.task + # parallel.task.code is based on parallel._component.base_path, so need to update it + job._base_path = job._component.base_path + return PipelineJob( + base_path=context[BASE_PATH_CONTEXT_KEY], + _source=ComponentSource.YAML_JOB, + **loaded_schema, + ) + + def __str__(self) -> str: + try: + res_to_yaml: str = self._to_yaml() + return res_to_yaml + except BaseException: # pylint: disable=W0718 + res: str = super(PipelineJob, self).__str__() + return res + + def _get_telemetry_values(self) -> Dict: + telemetry_values: dict = super()._get_telemetry_values() + if isinstance(self.component, PipelineComponent): + telemetry_values.update(self.component._get_telemetry_values()) + else: + telemetry_values.update({"source": ComponentSource.REMOTE_WORKSPACE_JOB}) + telemetry_values.pop("is_anonymous") + return telemetry_values + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "PipelineComponent": + """Translate a pipeline job to pipeline component. + + :param context: Context of pipeline job YAML file. + :type context: dict + :return: Translated pipeline component. + :rtype: PipelineComponent + """ + ignored_keys = PipelineComponent._check_ignored_keys(self) + if ignored_keys: + name = self.name or self.display_name + name = f"{name!r} " if name else "" + module_logger.warning("%s ignored when translating PipelineJob %sto PipelineComponent.", ignored_keys, name) + pipeline_job_dict = kwargs.get("pipeline_job_dict", {}) + context = context or {BASE_PATH_CONTEXT_KEY: Path("./")} + + # Create anonymous pipeline component with default version as 1 + return PipelineComponent( + base_path=context[BASE_PATH_CONTEXT_KEY], + display_name=self.display_name, + inputs=self._to_inputs(inputs=self.inputs, pipeline_job_dict=pipeline_job_dict), + outputs=self._to_outputs(outputs=self.outputs, pipeline_job_dict=pipeline_job_dict), + jobs=self.jobs, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/pipeline_job_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/pipeline_job_settings.py new file mode 100644 index 00000000..0fe41e2e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/pipeline/pipeline_job_settings.py @@ -0,0 +1,75 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Any, Dict, Generator, Optional + +from azure.ai.ml.entities._job.pipeline._attr_dict import _AttrDict + + +class PipelineJobSettings(_AttrDict): + """Settings of PipelineJob. + + :param default_datastore: The default datastore of the pipeline. + :type default_datastore: str + :param default_compute: The default compute target of the pipeline. + :type default_compute: str + :param continue_on_step_failure: Flag indicating whether to continue pipeline execution if a step fails. + :type continue_on_step_failure: bool + :param force_rerun: Flag indicating whether to force rerun pipeline execution. + :type force_rerun: bool + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_pipeline_job_configurations.py + :start-after: [START configure_pipeline_job_and_settings] + :end-before: [END configure_pipeline_job_and_settings] + :language: python + :dedent: 8 + :caption: Shows how to set pipeline properties using this class. + """ + + def __init__( + self, + default_datastore: Optional[str] = None, + default_compute: Optional[str] = None, + continue_on_step_failure: Optional[bool] = None, + force_rerun: Optional[bool] = None, + **kwargs: Any + ) -> None: + self._init = True + super().__init__() + self.default_compute: Any = default_compute + self.default_datastore: Any = default_datastore + self.continue_on_step_failure = continue_on_step_failure + self.force_rerun = force_rerun + self.on_init = kwargs.get("on_init", None) + self.on_finalize = kwargs.get("on_finalize", None) + for k, v in kwargs.items(): + setattr(self, k, v) + self._init = False + + def _get_valid_keys(self) -> Generator[str, Any, None]: + for k, v in self.__dict__.items(): + if v is None: + continue + # skip private attributes inherited from _AttrDict + if k in ["_logger", "_allowed_keys", "_init", "_key_restriction"]: + continue + yield k + + def _to_dict(self) -> Dict: + result = {} + for k in self._get_valid_keys(): + result[k] = self.__dict__[k] + result.update(self._get_attrs()) + return result + + def _initializing(self) -> bool: + return self._init + + def __bool__(self) -> bool: + for _ in self._get_valid_keys(): + return True + # _attr_dict will return False if no extra attributes are set + return self.__len__() > 0 diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/queue_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/queue_settings.py new file mode 100644 index 00000000..5b51fb6e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/queue_settings.py @@ -0,0 +1,87 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from typing import Any, Dict, Optional, Union + +from ..._restclient.v2023_04_01_preview.models import QueueSettings as RestQueueSettings +from ..._utils._experimental import experimental +from ..._utils.utils import is_data_binding_expression +from ...constants._job.job import JobPriorityValues, JobTierNames +from ...entities._mixins import DictMixin, RestTranslatableMixin +from ...exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException + +module_logger = logging.getLogger(__name__) + + +@experimental +class QueueSettings(RestTranslatableMixin, DictMixin): + """Queue settings for a pipeline job. + + :ivar job_tier: Enum to determine the job tier. Possible values include: "Spot", "Basic", + "Standard", "Premium", "Null". + :vartype job_tier: str or ~azure.mgmt.machinelearningservices.models.JobTier + :ivar priority: Controls the priority of the job on a compute. + :vartype priority: str + :keyword job_tier: The job tier. Accepted values are "Spot", "Basic", "Standard", and "Premium". + :paramtype job_tier: Optional[Literal]] + :keyword priority: The priority of the job on a compute. Accepted values are "low", "medium", and "high". + Defaults to "medium". + :paramtype priority: Optional[Literal] + :keyword kwargs: Additional properties for QueueSettings. + :paramtype kwargs: Optional[dict] + """ + + def __init__( + self, # pylint: disable=unused-argument + *, + job_tier: Optional[str] = None, + priority: Optional[str] = None, + **kwargs: Any, + ) -> None: + self.job_tier = job_tier + self.priority = priority + + def _to_rest_object(self) -> RestQueueSettings: + self._validate() + job_tier = JobTierNames.ENTITY_TO_REST.get(self.job_tier.lower(), None) if self.job_tier else None + priority = JobPriorityValues.ENTITY_TO_REST.get(self.priority.lower(), None) if self.priority else None + return RestQueueSettings(job_tier=job_tier, priority=priority) + + @classmethod + def _from_rest_object(cls, obj: Union[Dict[str, Any], RestQueueSettings, None]) -> Optional["QueueSettings"]: + if obj is None: + return None + if isinstance(obj, dict): + queue_settings = RestQueueSettings.from_dict(obj) + return cls._from_rest_object(queue_settings) + job_tier = JobTierNames.REST_TO_ENTITY.get(obj.job_tier, None) if obj.job_tier else None + priority = JobPriorityValues.REST_TO_ENTITY.get(obj.priority, None) if hasattr(obj, "priority") else None + return cls(job_tier=job_tier, priority=priority) + + def _validate(self) -> None: + for key, enum_class in [("job_tier", JobTierNames), ("priority", JobPriorityValues)]: + value = getattr(self, key) + if is_data_binding_expression(value): + msg = ( + f"do not support data binding expression on {key} as it involves value mapping " + f"when transformed to rest object, but received '{value}'." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + valid_keys = list(enum_class.ENTITY_TO_REST.keys()) # type: ignore[attr-defined] + if value and value.lower() not in valid_keys: + msg = f"{key} should be one of {valid_keys}, but received '{value}'." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/resource_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/resource_configuration.py new file mode 100644 index 00000000..a10d4a66 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/resource_configuration.py @@ -0,0 +1,98 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import json +import logging +from typing import Any, Dict, Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ResourceConfiguration as RestResourceConfiguration +from azure.ai.ml.constants._job.job import JobComputePropertyFields +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin + +module_logger = logging.getLogger(__name__) + + +class ResourceConfiguration(RestTranslatableMixin, DictMixin): + """Resource configuration for a job. + + This class should not be instantiated directly. Instead, use its subclasses. + + :keyword instance_count: The number of instances to use for the job. + :paramtype instance_count: Optional[int] + :keyword instance_type: The type of instance to use for the job. + :paramtype instance_type: Optional[str] + :keyword properties: The resource's property dictionary. + :paramtype properties: Optional[dict[str, Any]] + """ + + def __init__( + self, # pylint: disable=unused-argument + *, + instance_count: Optional[int] = None, + instance_type: Optional[str] = None, + properties: Optional[Dict[str, Any]] = None, + **kwargs: Any + ) -> None: + self.instance_count = instance_count + self.instance_type = instance_type + self.properties = {} + if properties is not None: + for key, value in properties.items(): + if key == JobComputePropertyFields.AISUPERCOMPUTER: + self.properties[JobComputePropertyFields.SINGULARITY.lower()] = value + else: + self.properties[key] = value + + def _to_rest_object(self) -> RestResourceConfiguration: + serialized_properties = {} + if self.properties: + for key, value in self.properties.items(): + try: + if ( + key.lower() == JobComputePropertyFields.SINGULARITY.lower() + or key.lower() == JobComputePropertyFields.AISUPERCOMPUTER.lower() + ): + # Map Singularity -> AISupercomputer in SDK until MFE does mapping + key = JobComputePropertyFields.AISUPERCOMPUTER + # recursively convert Ordered Dict to dictionary + serialized_properties[key] = json.loads(json.dumps(value)) + except Exception: # pylint: disable=W0718 + pass + return RestResourceConfiguration( + instance_count=self.instance_count, + instance_type=self.instance_type, + properties=serialized_properties, + ) + + @classmethod + def _from_rest_object( # pylint: disable=arguments-renamed + cls, rest_obj: Optional[RestResourceConfiguration] + ) -> Optional["ResourceConfiguration"]: + if rest_obj is None: + return None + return ResourceConfiguration( + instance_count=rest_obj.instance_count, + instance_type=rest_obj.instance_type, + properties=rest_obj.properties, + deserialize_properties=True, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ResourceConfiguration): + return NotImplemented + return self.instance_count == other.instance_count and self.instance_type == other.instance_type + + def __ne__(self, other: object) -> bool: + if not isinstance(other, ResourceConfiguration): + return NotImplemented + return not self.__eq__(other) + + def _merge_with(self, other: "ResourceConfiguration") -> None: + if other: + if other.instance_count: + self.instance_count = other.instance_count + if other.instance_type: + self.instance_type = other.instance_type + if other.properties: + self.properties = other.properties diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/service_instance.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/service_instance.py new file mode 100644 index 00000000..0e5ba6c6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/service_instance.py @@ -0,0 +1,59 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import logging +from typing import Any, Dict, Optional + +from azure.ai.ml._restclient.runhistory.models import ServiceInstanceResult +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin + +module_logger = logging.getLogger(__name__) + + +class ServiceInstance(RestTranslatableMixin, DictMixin): + """Service Instance Result. + + :keyword type: The type of service. + :paramtype type: Optional[str] + :keyword port: The port used by the service. + :paramtype port: Optional[int] + :keyword status: The status of the service. + :paramtype status: Optional[str] + :keyword error: The error message. + :paramtype error: Optional[str] + :keyword endpoint: The service endpoint. + :paramtype endpoint: Optional[str] + :keyword properties: The service instance's properties. + :paramtype properties: Optional[dict[str, str]] + """ + + def __init__( + self, # pylint: disable=unused-argument + *, + type: Optional[str] = None, # pylint: disable=redefined-builtin + port: Optional[int] = None, + status: Optional[str] = None, + error: Optional[str] = None, + endpoint: Optional[str] = None, + properties: Optional[Dict[str, str]] = None, + **kwargs: Any + ) -> None: + self.type = type + self.port = port + self.status = status + self.error = error + self.endpoint = endpoint + self.properties = properties + + @classmethod + # pylint: disable=arguments-differ + def _from_rest_object(cls, obj: ServiceInstanceResult, node_index: int) -> "ServiceInstance": # type: ignore + return cls( + type=obj.type, + port=obj.port, + status=obj.status, + error=obj.error.error.message if obj.error and obj.error.error else None, + endpoint=obj.endpoint.replace("<nodeIndex>", str(node_index)) if obj.endpoint else obj.endpoint, + properties=obj.properties, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_helpers.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_helpers.py new file mode 100644 index 00000000..d3fdf9dc --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_helpers.py @@ -0,0 +1,210 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +# pylint: disable=protected-access +import re +from typing import Any + +from azure.ai.ml.constants import InputOutputModes +from azure.ai.ml.constants._component import ComponentJobConstants +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._job.pipeline._io import NodeInput, NodeOutput +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +def _validate_spark_configurations(obj: Any) -> None: + # skip validation when component of node is from remote + if hasattr(obj, "component") and isinstance(obj.component, str): + return + if obj.dynamic_allocation_enabled in ["True", "true", True]: + if ( + obj.driver_cores is None + or obj.driver_memory is None + or obj.executor_cores is None + or obj.executor_memory is None + ): + msg = ( + "spark.driver.cores, spark.driver.memory, spark.executor.cores and spark.executor.memory are " + "mandatory fields." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + if obj.dynamic_allocation_min_executors is None or obj.dynamic_allocation_max_executors is None: + msg = ( + "spark.dynamicAllocation.minExecutors and spark.dynamicAllocation.maxExecutors are required " + "when dynamic allocation is enabled." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + if not ( + obj.dynamic_allocation_min_executors > 0 + and obj.dynamic_allocation_min_executors <= obj.dynamic_allocation_max_executors + ): + msg = ( + "Dynamic min executors should be bigger than 0 and min executors should be equal or less than " + "max executors." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + if obj.executor_instances and ( + obj.executor_instances > obj.dynamic_allocation_max_executors + or obj.executor_instances < obj.dynamic_allocation_min_executors + ): + msg = ( + "Executor instances must be a valid non-negative integer and must be between " + "spark.dynamicAllocation.minExecutors and spark.dynamicAllocation.maxExecutors" + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + else: + if ( + obj.driver_cores is None + or obj.driver_memory is None + or obj.executor_cores is None + or obj.executor_memory is None + or obj.executor_instances is None + ): + msg = ( + "spark.driver.cores, spark.driver.memory, spark.executor.cores, spark.executor.memory and " + "spark.executor.instances are mandatory fields." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + if obj.dynamic_allocation_min_executors is not None or obj.dynamic_allocation_max_executors is not None: + msg = "Should not specify min or max executors when dynamic allocation is disabled." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + + +def _validate_compute_or_resources(compute: Any, resources: Any) -> None: + # if resources is set, then ensure it is valid before + # checking mutual exclusiveness against compute existence + if compute is None and resources is None: + msg = "One of either compute or resources must be specified for Spark job" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + if compute and resources: + msg = "Only one of either compute or resources may be specified for Spark job" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + + +# Only "direct" mode is supported for spark job inputs and outputs +# pylint: disable=no-else-raise, too-many-boolean-expressions +def _validate_input_output_mode(inputs: Any, outputs: Any) -> None: + for input_name, input_value in inputs.items(): + if isinstance(input_value, Input) and input_value.mode != InputOutputModes.DIRECT: + # For standalone job input + msg = "Input '{}' is using '{}' mode, only '{}' is supported for Spark job" + raise ValidationException( + message=msg.format(input_name, input_value.mode, InputOutputModes.DIRECT), + no_personal_data_message=msg.format("[input_name]", "[input_value.mode]", "direct"), + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + elif ( + isinstance(input_value, NodeInput) + and ( + isinstance(input_value._data, Input) + and not ( + isinstance(input_value._data.path, str) + and bool(re.search(ComponentJobConstants.INPUT_PATTERN, input_value._data.path)) + ) + and input_value._data.mode != InputOutputModes.DIRECT + ) + and (isinstance(input_value._meta, Input) and input_value._meta.mode != InputOutputModes.DIRECT) + ): + # For node input in pipeline job, client side can only validate node input which isn't bound to pipeline + # input or node output. + # 1. If node input is bound to pipeline input, we can't get pipeline level input mode in node level + # validate. Even if we can judge through component input mode (_meta), we should note that pipeline level + # input mode has higher priority than component level. so component input can be set "Mount", but it can + # run successfully when pipeline input is "Direct". + # 2. If node input is bound to last node output, input mode should be decoupled with output mode, so we + # always get None mode in node level. In this case, if we define correct "Direct" mode in component yaml, + # component level mode will take effect and run successfully. Otherwise, it need to set mode in node level + # like input1: path: ${{parent.jobs.sample_word.outputs.output1}} mode: direct. + msg = "Input '{}' is using '{}' mode, only '{}' is supported for Spark job" + raise ValidationException( + message=msg.format( + input_name, input_value._data.mode or input_value._meta.mode, InputOutputModes.DIRECT + ), + no_personal_data_message=msg.format("[input_name]", "[input_value.mode]", "direct"), + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + + for output_name, output_value in outputs.items(): + if ( + isinstance(output_value, Output) + and output_name != "default" + and output_value.mode != InputOutputModes.DIRECT + ): + # For standalone job output + msg = "Output '{}' is using '{}' mode, only '{}' is supported for Spark job" + raise ValidationException( + message=msg.format(output_name, output_value.mode, InputOutputModes.DIRECT), + no_personal_data_message=msg.format("[output_name]", "[output_value.mode]", "direct"), + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + elif ( + isinstance(output_value, NodeOutput) + and output_name != "default" + and ( + isinstance(output_value._data, Output) + and not ( + isinstance(output_value._data.path, str) + and bool(re.search(ComponentJobConstants.OUTPUT_PATTERN, output_value._data.path)) + ) + and output_value._data.mode != InputOutputModes.DIRECT + ) + and (isinstance(output_value._meta, Output) and output_value._meta.mode != InputOutputModes.DIRECT) + ): + # For node output in pipeline job, client side can only validate node output which isn't bound to pipeline + # output. + # 1. If node output is bound to pipeline output, we can't get pipeline level output mode in node level + # validate. Even if we can judge through component output mode (_meta), we should note that pipeline level + # output mode has higher priority than component level. so component output can be set "upload", but it + # can run successfully when pipeline output is "Direct". + msg = "Output '{}' is using '{}' mode, only '{}' is supported for Spark job" + raise ValidationException( + message=msg.format( + output_name, output_value._data.mode or output_value._meta.mode, InputOutputModes.DIRECT + ), + no_personal_data_message=msg.format("[output_name]", "[output_value.mode]", "direct"), + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job.py new file mode 100644 index 00000000..10930fb4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job.py @@ -0,0 +1,393 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +# pylint: disable=protected-access, too-many-instance-attributes + +import copy +import logging +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from marshmallow import INCLUDE + +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase +from azure.ai.ml._restclient.v2023_04_01_preview.models import SparkJob as RestSparkJob +from azure.ai.ml._schema.job.identity import AMLTokenIdentitySchema, ManagedIdentitySchema, UserIdentitySchema +from azure.ai.ml._schema.job.parameterized_spark import CONF_KEY_MAP +from azure.ai.ml._schema.job.spark_job import SparkJobSchema +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.constants._job.job import SparkConfKey +from azure.ai.ml.entities._credentials import ( + AmlTokenConfiguration, + ManagedIdentityConfiguration, + UserIdentityConfiguration, + _BaseJobIdentityConfiguration, +) +from azure.ai.ml.entities._inputs_outputs import Input, Output +from azure.ai.ml.entities._job._input_output_helpers import ( + from_rest_data_outputs, + from_rest_inputs_to_dataset_literal, + to_rest_data_outputs, + to_rest_dataset_literal_inputs, + validate_inputs_for_args, +) +from azure.ai.ml.entities._job.parameterized_spark import ParameterizedSpark +from azure.ai.ml.entities._util import load_from_dict + +from ..._schema import NestedField, UnionField +from .job import Job +from .job_io_mixin import JobIOMixin +from .spark_helpers import _validate_compute_or_resources, _validate_input_output_mode, _validate_spark_configurations +from .spark_job_entry import SparkJobEntry +from .spark_job_entry_mixin import SparkJobEntryMixin +from .spark_resource_configuration import SparkResourceConfiguration + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities import SparkComponent + from azure.ai.ml.entities._builders import Spark + +module_logger = logging.getLogger(__name__) + + +class SparkJob(Job, ParameterizedSpark, JobIOMixin, SparkJobEntryMixin): + """A standalone Spark job. + + :keyword driver_cores: The number of cores to use for the driver process, only in cluster mode. + :paramtype driver_cores: Optional[int] + :keyword driver_memory: The amount of memory to use for the driver process, formatted as strings with a size unit + suffix ("k", "m", "g" or "t") (e.g. "512m", "2g"). + :paramtype driver_memory: Optional[str] + :keyword executor_cores: The number of cores to use on each executor. + :paramtype executor_cores: Optional[int] + :keyword executor_memory: The amount of memory to use per executor process, formatted as strings with a size unit + suffix ("k", "m", "g" or "t") (e.g. "512m", "2g"). + :paramtype executor_memory: Optional[str] + :keyword executor_instances: The initial number of executors. + :paramtype executor_instances: Optional[int] + :keyword dynamic_allocation_enabled: Whether to use dynamic resource allocation, which scales the number of + executors registered with this application up and down based on the workload. + :paramtype dynamic_allocation_enabled: Optional[bool] + :keyword dynamic_allocation_min_executors: The lower bound for the number of executors if dynamic allocation is + enabled. + :paramtype dynamic_allocation_min_executors: Optional[int] + :keyword dynamic_allocation_max_executors: The upper bound for the number of executors if dynamic allocation is + enabled. + :paramtype dynamic_allocation_max_executors: Optional[int] + :keyword inputs: The mapping of input data bindings used in the job. + :paramtype inputs: Optional[dict[str, ~azure.ai.ml.Input]] + :keyword outputs: The mapping of output data bindings used in the job. + :paramtype outputs: Optional[dict[str, ~azure.ai.ml.Output]] + :keyword compute: The compute resource the job runs on. + :paramtype compute: Optional[str] + :keyword identity: The identity that the Spark job will use while running on compute. + :paramtype identity: Optional[Union[dict[str, str], ~azure.ai.ml.ManagedIdentityConfiguration, + ~azure.ai.ml.AmlTokenConfiguration, ~azure.ai.ml.UserIdentityConfiguration]] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_spark_configurations.py + :start-after: [START spark_job_configuration] + :end-before: [END spark_job_configuration] + :language: python + :dedent: 8 + :caption: Configuring a SparkJob. + """ + + def __init__( + self, + *, + driver_cores: Optional[Union[int, str]] = None, + driver_memory: Optional[str] = None, + executor_cores: Optional[Union[int, str]] = None, + executor_memory: Optional[str] = None, + executor_instances: Optional[Union[int, str]] = None, + dynamic_allocation_enabled: Optional[Union[bool, str]] = None, + dynamic_allocation_min_executors: Optional[Union[int, str]] = None, + dynamic_allocation_max_executors: Optional[Union[int, str]] = None, + inputs: Optional[Dict[str, Union[Input, str, bool, int, float]]] = None, + outputs: Optional[Dict[str, Output]] = None, + compute: Optional[str] = None, + identity: Optional[ + Union[Dict[str, str], ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration] + ] = None, + resources: Optional[Union[Dict, SparkResourceConfiguration]] = None, + **kwargs: Any, + ) -> None: + kwargs[TYPE] = JobType.SPARK + + super().__init__(**kwargs) + self.conf: Dict = self.conf or {} + self.properties_sparkJob = self.properties or {} + self.driver_cores = driver_cores + self.driver_memory = driver_memory + self.executor_cores = executor_cores + self.executor_memory = executor_memory + self.executor_instances = executor_instances + self.dynamic_allocation_enabled = dynamic_allocation_enabled + self.dynamic_allocation_min_executors = dynamic_allocation_min_executors + self.dynamic_allocation_max_executors = dynamic_allocation_max_executors + self.inputs = inputs # type: ignore[assignment] + self.outputs = outputs # type: ignore[assignment] + self.compute = compute + self.resources = resources + self.identity = identity + if self.executor_instances is None and str(self.dynamic_allocation_enabled).lower() == "true": + self.executor_instances = self.dynamic_allocation_min_executors + + @property + def resources(self) -> Optional[Union[Dict, SparkResourceConfiguration]]: + """The compute resource configuration for the job. + + :return: The compute resource configuration for the job. + :rtype: Optional[~azure.ai.ml.entities.SparkResourceConfiguration] + """ + return self._resources + + @resources.setter + def resources(self, value: Optional[Union[Dict[str, str], SparkResourceConfiguration]]) -> None: + """Sets the compute resource configuration for the job. + + :param value: The compute resource configuration for the job. + :type value: Optional[Union[dict[str, str], ~azure.ai.ml.entities.SparkResourceConfiguration]] + """ + if isinstance(value, dict): + value = SparkResourceConfiguration(**value) + self._resources = value + + @property + def identity( + self, + ) -> Optional[Union[Dict, ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration]]: + """The identity that the Spark job will use while running on compute. + + :return: The identity that the Spark job will use while running on compute. + :rtype: Optional[Union[~azure.ai.ml.ManagedIdentityConfiguration, ~azure.ai.ml.AmlTokenConfiguration, + ~azure.ai.ml.UserIdentityConfiguration]] + """ + return self._identity + + @identity.setter + def identity( + self, + value: Optional[ + Union[Dict[str, str], ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration] + ], + ) -> None: + """Sets the identity that the Spark job will use while running on compute. + + :param value: The identity that the Spark job will use while running on compute. + :type value: Optional[Union[dict[str, str], ~azure.ai.ml.ManagedIdentityConfiguration, + ~azure.ai.ml.AmlTokenConfiguration, ~azure.ai.ml.UserIdentityConfiguration]] + """ + if isinstance(value, dict): + identify_schema = UnionField( + [ + NestedField(ManagedIdentitySchema, unknown=INCLUDE), + NestedField(AMLTokenIdentitySchema, unknown=INCLUDE), + NestedField(UserIdentitySchema, unknown=INCLUDE), + ] + ) + value = identify_schema._deserialize(value=value, attr=None, data=None) + self._identity = value + + def _to_dict(self) -> Dict: + res: dict = SparkJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def filter_conf_fields(self) -> Dict[str, str]: + """Filters out the fields of the conf attribute that are not among the Spark configuration fields + listed in ~azure.ai.ml._schema.job.parameterized_spark.CONF_KEY_MAP and returns them in their own dictionary. + + :return: A dictionary of the conf fields that are not Spark configuration fields. + :rtype: dict[str, str] + """ + if self.conf is None: + return {} + data_conf = {} + for conf_key, conf_val in self.conf.items(): + if not conf_key in CONF_KEY_MAP: + data_conf[conf_key] = conf_val + return data_conf + + def _to_rest_object(self) -> JobBase: + self._validate() + conf = { + **(self.filter_conf_fields()), + "spark.driver.cores": self.driver_cores, + "spark.driver.memory": self.driver_memory, + "spark.executor.cores": self.executor_cores, + "spark.executor.memory": self.executor_memory, + } + if self.dynamic_allocation_enabled in ["True", "true", True]: + conf["spark.dynamicAllocation.enabled"] = True + conf["spark.dynamicAllocation.minExecutors"] = self.dynamic_allocation_min_executors + conf["spark.dynamicAllocation.maxExecutors"] = self.dynamic_allocation_max_executors + if self.executor_instances is not None: + conf["spark.executor.instances"] = self.executor_instances + + properties = RestSparkJob( + experiment_name=self.experiment_name, + display_name=self.display_name, + description=self.description, + tags=self.tags, + code_id=self.code, + entry=self.entry._to_rest_object() if self.entry is not None and not isinstance(self.entry, dict) else None, + py_files=self.py_files, + jars=self.jars, + files=self.files, + archives=self.archives, + identity=( + self.identity._to_job_rest_object() if self.identity and not isinstance(self.identity, dict) else None + ), + conf=conf, + properties=self.properties_sparkJob, + environment_id=self.environment, + inputs=to_rest_dataset_literal_inputs(self.inputs, job_type=self.type), + outputs=to_rest_data_outputs(self.outputs), + args=self.args, + compute_id=self.compute, + resources=( + self.resources._to_rest_object() if self.resources and not isinstance(self.resources, Dict) else None + ), + ) + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "SparkJob": + loaded_data = load_from_dict(SparkJobSchema, data, context, additional_message, **kwargs) + return SparkJob(base_path=context[BASE_PATH_CONTEXT_KEY], **loaded_data) + + @classmethod + def _load_from_rest(cls, obj: JobBase) -> "SparkJob": + rest_spark_job: RestSparkJob = obj.properties + rest_spark_conf = copy.copy(rest_spark_job.conf) or {} + spark_job = SparkJob( + name=obj.name, + entry=SparkJobEntry._from_rest_object(rest_spark_job.entry), + experiment_name=rest_spark_job.experiment_name, + id=obj.id, + display_name=rest_spark_job.display_name, + description=rest_spark_job.description, + tags=rest_spark_job.tags, + properties=rest_spark_job.properties, + services=rest_spark_job.services, + status=rest_spark_job.status, + creation_context=obj.system_data, + code=rest_spark_job.code_id, + compute=rest_spark_job.compute_id, + environment=rest_spark_job.environment_id, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(rest_spark_job.identity) + if rest_spark_job.identity + else None + ), + args=rest_spark_job.args, + conf=rest_spark_conf, + driver_cores=rest_spark_conf.get( + SparkConfKey.DRIVER_CORES, None + ), # copy fields from conf into the promote attribute in spark + driver_memory=rest_spark_conf.get(SparkConfKey.DRIVER_MEMORY, None), + executor_cores=rest_spark_conf.get(SparkConfKey.EXECUTOR_CORES, None), + executor_memory=rest_spark_conf.get(SparkConfKey.EXECUTOR_MEMORY, None), + executor_instances=rest_spark_conf.get(SparkConfKey.EXECUTOR_INSTANCES, None), + dynamic_allocation_enabled=rest_spark_conf.get(SparkConfKey.DYNAMIC_ALLOCATION_ENABLED, None), + dynamic_allocation_min_executors=rest_spark_conf.get(SparkConfKey.DYNAMIC_ALLOCATION_MIN_EXECUTORS, None), + dynamic_allocation_max_executors=rest_spark_conf.get(SparkConfKey.DYNAMIC_ALLOCATION_MAX_EXECUTORS, None), + resources=SparkResourceConfiguration._from_rest_object(rest_spark_job.resources), + inputs=from_rest_inputs_to_dataset_literal(rest_spark_job.inputs), + outputs=from_rest_data_outputs(rest_spark_job.outputs), + ) + return spark_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "SparkComponent": + """Translate a spark job to component. + + :param context: Context of spark job YAML file. + :type context: dict + :return: Translated spark component. + :rtype: SparkComponent + """ + from azure.ai.ml.entities import SparkComponent + + pipeline_job_dict = kwargs.get("pipeline_job_dict", {}) + context = context or {BASE_PATH_CONTEXT_KEY: Path("./")} + + # Create anonymous spark component with default version as 1 + return SparkComponent( + tags=self.tags, + is_anonymous=True, + base_path=context[BASE_PATH_CONTEXT_KEY], + description=self.description, + code=self.code, + entry=self.entry, + py_files=self.py_files, + jars=self.jars, + files=self.files, + archives=self.archives, + driver_cores=self.driver_cores, + driver_memory=self.driver_memory, + executor_cores=self.executor_cores, + executor_memory=self.executor_memory, + executor_instances=self.executor_instances, + dynamic_allocation_enabled=self.dynamic_allocation_enabled, + dynamic_allocation_min_executors=self.dynamic_allocation_min_executors, + dynamic_allocation_max_executors=self.dynamic_allocation_max_executors, + conf=self.conf, + properties=self.properties_sparkJob, + environment=self.environment, + inputs=self._to_inputs(inputs=self.inputs, pipeline_job_dict=pipeline_job_dict), + outputs=self._to_outputs(outputs=self.outputs, pipeline_job_dict=pipeline_job_dict), + args=self.args, + ) + + def _to_node(self, context: Optional[Dict] = None, **kwargs: Any) -> "Spark": + """Translate a spark job to a pipeline node. + + :param context: Context of spark job YAML file. + :type context: dict + :return: Translated spark component. + :rtype: Spark + """ + from azure.ai.ml.entities._builders import Spark + + component = self._to_component(context, **kwargs) + + return Spark( + display_name=self.display_name, + description=self.description, + tags=self.tags, + # code, entry, py_files, jars, files, archives, environment and args are static and not allowed to be + # overwritten. And we will always get them from component. + component=component, + identity=self.identity, + driver_cores=self.driver_cores, + driver_memory=self.driver_memory, + executor_cores=self.executor_cores, + executor_memory=self.executor_memory, + executor_instances=self.executor_instances, + dynamic_allocation_enabled=self.dynamic_allocation_enabled, + dynamic_allocation_min_executors=self.dynamic_allocation_min_executors, + dynamic_allocation_max_executors=self.dynamic_allocation_max_executors, + conf=self.conf, + inputs=self.inputs, # type: ignore[arg-type] + outputs=self.outputs, # type: ignore[arg-type] + compute=self.compute, + resources=self.resources, + properties=self.properties_sparkJob, + ) + + def _validate(self) -> None: + # TODO: make spark job schema validatable? + if self.resources and not isinstance(self.resources, Dict): + self.resources._validate() + _validate_compute_or_resources(self.compute, self.resources) + _validate_input_output_mode(self.inputs, self.outputs) + _validate_spark_configurations(self) + self._validate_entry() + + if self.args: + validate_inputs_for_args(self.args, self.inputs) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job_entry.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job_entry.py new file mode 100644 index 00000000..ed8d3ca7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job_entry.py @@ -0,0 +1,59 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +# pylint: disable=redefined-builtin + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import SparkJobEntry as RestSparkJobEntry +from azure.ai.ml._restclient.v2023_04_01_preview.models import SparkJobPythonEntry, SparkJobScalaEntry +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class SparkJobEntryType: + """Type of Spark job entry. Possibilities are Python file entry or Scala class entry.""" + + SPARK_JOB_FILE_ENTRY = "SparkJobPythonEntry" + SPARK_JOB_CLASS_ENTRY = "SparkJobScalaEntry" + + +class SparkJobEntry(RestTranslatableMixin): + """Entry for Spark job. + + :keyword entry: The file or class entry point. + :paramtype entry: str + :keyword type: The entry type. Accepted values are SparkJobEntryType.SPARK_JOB_FILE_ENTRY or + SparkJobEntryType.SPARK_JOB_CLASS_ENTRY. Defaults to SparkJobEntryType.SPARK_JOB_FILE_ENTRY. + :paramtype type: ~azure.ai.ml.entities.SparkJobEntryType + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_spark_configurations.py + :start-after: [START spark_component_definition] + :end-before: [END spark_component_definition] + :language: python + :dedent: 8 + :caption: Creating SparkComponent. + """ + + def __init__(self, *, entry: str, type: str = SparkJobEntryType.SPARK_JOB_FILE_ENTRY) -> None: + self.entry_type = type + self.entry = entry + + @classmethod + def _from_rest_object(cls, obj: Union[SparkJobPythonEntry, SparkJobScalaEntry]) -> Optional["SparkJobEntry"]: + if obj is None: + return None + if isinstance(obj, dict): + obj = RestSparkJobEntry.from_dict(obj) + if obj.spark_job_entry_type == SparkJobEntryType.SPARK_JOB_FILE_ENTRY: + return SparkJobEntry( + entry=obj.__dict__.get("file", None), + type=SparkJobEntryType.SPARK_JOB_FILE_ENTRY, + ) + return SparkJobEntry(entry=obj.class_name, type=SparkJobEntryType.SPARK_JOB_CLASS_ENTRY) + + def _to_rest_object(self) -> Union[SparkJobPythonEntry, SparkJobScalaEntry]: + if self.entry_type == SparkJobEntryType.SPARK_JOB_FILE_ENTRY: + return SparkJobPythonEntry(file=self.entry) + return SparkJobScalaEntry(class_name=self.entry) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job_entry_mixin.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job_entry_mixin.py new file mode 100644 index 00000000..2a1ff549 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_job_entry_mixin.py @@ -0,0 +1,64 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import re +from typing import Any, Dict, Optional, Union, cast + +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + +from .spark_job_entry import SparkJobEntry, SparkJobEntryType + + +class SparkJobEntryMixin: + CODE_ID_RE_PATTERN = re.compile( + ( + r"\/subscriptions\/(?P<subscription>[\w,-]+)\/resourceGroups\/(?P<resource_group>[\w,-]+)" + r"\/providers\/Microsoft\.MachineLearningServices\/workspaces\/(?P<workspace>[\w,-]+)" + r"\/codes\/(?P<code_id>[\w,-]+)" # fmt: skip + ) + ) + + def __init__(self, **kwargs: Any): + self._entry = None + self.entry = kwargs.get("entry", None) + + @property + def entry(self) -> Optional[Union[Dict[str, str], SparkJobEntry]]: + return self._entry + + @entry.setter + def entry(self, value: Optional[Union[Dict[str, str], SparkJobEntry]]) -> None: + if isinstance(value, dict): + if value.get("file", None): + _entry = cast(str, value.get("file")) + self._entry = SparkJobEntry(entry=_entry, type=SparkJobEntryType.SPARK_JOB_FILE_ENTRY) + return + if value.get("class_name", None): + _entry = cast(str, value.get("class_name")) + self._entry = SparkJobEntry(entry=_entry, type=SparkJobEntryType.SPARK_JOB_CLASS_ENTRY) + return + self._entry = value + + def _validate_entry(self) -> None: + if self.entry is None: + # Entry is a required field for local component and when we load a remote job, component now is an arm_id, + # entry is from node level returned from service. Entry is only None when we reference an existing + # component with a function and the referenced component is in remote with name and version. + return + if not isinstance(self.entry, SparkJobEntry): + msg = f"Unsupported type {type(self.entry)} detected when validate entry, entry should be SparkJobEntry." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + if self.entry.entry_type == SparkJobEntryType.SPARK_JOB_CLASS_ENTRY: + msg = "Classpath is not supported, please use 'file' to define the entry file." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_resource_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_resource_configuration.py new file mode 100644 index 00000000..138fc7ed --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/spark_resource_configuration.py @@ -0,0 +1,91 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + SparkResourceConfiguration as RestSparkResourceConfiguration, +) +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class SparkResourceConfiguration(RestTranslatableMixin, DictMixin): + """Compute resource configuration for Spark component or job. + + :keyword instance_type: The type of VM to be used by the compute target. + :paramtype instance_type: Optional[str] + :keyword runtime_version: The Spark runtime version. + :paramtype runtime_version: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_spark_configurations.py + :start-after: [START spark_resource_configuration] + :end-before: [END spark_resource_configuration] + :language: python + :dedent: 8 + :caption: Configuring a SparkJob with SparkResourceConfiguration. + """ + + instance_type_list = [ + "standard_e4s_v3", + "standard_e8s_v3", + "standard_e16s_v3", + "standard_e32s_v3", + "standard_e64s_v3", + ] + + def __init__(self, *, instance_type: Optional[str] = None, runtime_version: Optional[str] = None) -> None: + self.instance_type = instance_type + self.runtime_version = runtime_version + + def _to_rest_object(self) -> RestSparkResourceConfiguration: + return RestSparkResourceConfiguration(instance_type=self.instance_type, runtime_version=self.runtime_version) + + @classmethod + def _from_rest_object( + cls, obj: Union[dict, None, RestSparkResourceConfiguration] + ) -> Optional["SparkResourceConfiguration"]: + if obj is None: + return None + if isinstance(obj, dict): + return SparkResourceConfiguration(**obj) + return SparkResourceConfiguration(instance_type=obj.instance_type, runtime_version=obj.runtime_version) + + def _validate(self) -> None: + # TODO: below logic is duplicated to SparkResourceConfigurationSchema, maybe make SparkJob schema validatable + if self.instance_type is None or self.instance_type == "": + msg = "Instance type must be specified for SparkResourceConfiguration" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + if self.instance_type.lower() not in self.instance_type_list: + msg = "Instance type must be specified for the list of {}".format(",".join(self.instance_type_list)) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SPARK_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, SparkResourceConfiguration): + return NotImplemented + return self.instance_type == other.instance_type and self.runtime_version == other.runtime_version + + def __ne__(self, other: object) -> bool: + if not isinstance(other, SparkResourceConfiguration): + return NotImplemented + return not self.__eq__(other) + + def _merge_with(self, other: "SparkResourceConfiguration") -> None: + if other: + if other.instance_type: + self.instance_type = other.instance_type + if other.runtime_version: + self.runtime_version = other.runtime_version diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/early_termination_policy.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/early_termination_policy.py new file mode 100644 index 00000000..b1b928fc --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/early_termination_policy.py @@ -0,0 +1,191 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from abc import ABC +from typing import Any, Optional, cast + +from azure.ai.ml._restclient.v2023_04_01_preview.models import BanditPolicy as RestBanditPolicy +from azure.ai.ml._restclient.v2023_04_01_preview.models import EarlyTerminationPolicy as RestEarlyTerminationPolicy +from azure.ai.ml._restclient.v2023_04_01_preview.models import EarlyTerminationPolicyType +from azure.ai.ml._restclient.v2023_04_01_preview.models import MedianStoppingPolicy as RestMedianStoppingPolicy +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + TruncationSelectionPolicy as RestTruncationSelectionPolicy, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class EarlyTerminationPolicy(ABC, RestTranslatableMixin): + def __init__( + self, + *, + delay_evaluation: int, + evaluation_interval: int, + ): + self.type = None + self.delay_evaluation = delay_evaluation + self.evaluation_interval = evaluation_interval + + @classmethod + def _from_rest_object(cls, obj: RestEarlyTerminationPolicy) -> Optional["EarlyTerminationPolicy"]: + if not obj: + return None + + policy: Any = None + if obj.policy_type == EarlyTerminationPolicyType.BANDIT: + policy = BanditPolicy._from_rest_object(obj) # pylint: disable=protected-access + + if obj.policy_type == EarlyTerminationPolicyType.MEDIAN_STOPPING: + policy = MedianStoppingPolicy._from_rest_object(obj) # pylint: disable=protected-access + + if obj.policy_type == EarlyTerminationPolicyType.TRUNCATION_SELECTION: + policy = TruncationSelectionPolicy._from_rest_object(obj) # pylint: disable=protected-access + + return cast(Optional["EarlyTerminationPolicy"], policy) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, EarlyTerminationPolicy): + raise NotImplementedError + res: bool = self._to_rest_object() == other._to_rest_object() + return res + + +class BanditPolicy(EarlyTerminationPolicy): + """Defines an early termination policy based on slack criteria and a frequency and delay interval for evaluation. + + :keyword delay_evaluation: Number of intervals by which to delay the first evaluation. Defaults to 0. + :paramtype delay_evaluation: int + :keyword evaluation_interval: Interval (number of runs) between policy evaluations. Defaults to 0. + :paramtype evaluation_interval: int + :keyword slack_amount: Absolute distance allowed from the best performing run. Defaults to 0. + :paramtype slack_amount: float + :keyword slack_factor: Ratio of the allowed distance from the best performing run. Defaults to 0. + :paramtype slack_factor: float + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_bandit_policy] + :end-before: [END configure_sweep_job_bandit_policy] + :language: python + :dedent: 8 + :caption: Configuring BanditPolicy early termination of a hyperparameter sweep on a Command job. + """ + + def __init__( + self, + *, + delay_evaluation: int = 0, + evaluation_interval: int = 0, + slack_amount: float = 0, + slack_factor: float = 0, + ) -> None: + super().__init__(delay_evaluation=delay_evaluation, evaluation_interval=evaluation_interval) + self.type = EarlyTerminationPolicyType.BANDIT.lower() + self.slack_factor = slack_factor + self.slack_amount = slack_amount + + def _to_rest_object(self) -> RestBanditPolicy: + return RestBanditPolicy( + delay_evaluation=self.delay_evaluation, + evaluation_interval=self.evaluation_interval, + slack_factor=self.slack_factor, + slack_amount=self.slack_amount, + ) + + @classmethod + def _from_rest_object(cls, obj: RestBanditPolicy) -> "BanditPolicy": + return cls( + delay_evaluation=obj.delay_evaluation, + evaluation_interval=obj.evaluation_interval, + slack_factor=obj.slack_factor, + slack_amount=obj.slack_amount, + ) + + +class MedianStoppingPolicy(EarlyTerminationPolicy): + """Defines an early termination policy based on a running average of the primary metric of all runs. + + :keyword delay_evaluation: Number of intervals by which to delay the first evaluation. Defaults to 0. + :paramtype delay_evaluation: int + :keyword evaluation_interval: Interval (number of runs) between policy evaluations. Defaults to 1. + :paramtype evaluation_interval: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_median_stopping_policy] + :end-before: [END configure_sweep_job_median_stopping_policy] + :language: python + :dedent: 8 + :caption: Configuring an early termination policy for a hyperparameter sweep job using MedianStoppingPolicy + """ + + def __init__( + self, + *, + delay_evaluation: int = 0, + evaluation_interval: int = 1, + ) -> None: + super().__init__(delay_evaluation=delay_evaluation, evaluation_interval=evaluation_interval) + self.type = camel_to_snake(EarlyTerminationPolicyType.MEDIAN_STOPPING) + + def _to_rest_object(self) -> RestMedianStoppingPolicy: + return RestMedianStoppingPolicy( + delay_evaluation=self.delay_evaluation, evaluation_interval=self.evaluation_interval + ) + + @classmethod + def _from_rest_object(cls, obj: RestMedianStoppingPolicy) -> "MedianStoppingPolicy": + return cls( + delay_evaluation=obj.delay_evaluation, + evaluation_interval=obj.evaluation_interval, + ) + + +class TruncationSelectionPolicy(EarlyTerminationPolicy): + """Defines an early termination policy that cancels a given percentage of runs at each evaluation interval. + + :keyword delay_evaluation: Number of intervals by which to delay the first evaluation. Defaults to 0. + :paramtype delay_evaluation: int + :keyword evaluation_interval: Interval (number of runs) between policy evaluations. Defaults to 0. + :paramtype evaluation_interval: int + :keyword truncation_percentage: The percentage of runs to cancel at each evaluation interval. Defaults to 0. + :paramtype truncation_percentage: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_truncation_selection_policy] + :end-before: [END configure_sweep_job_truncation_selection_policy] + :language: python + :dedent: 8 + :caption: Configuring an early termination policy for a hyperparameter sweep job + using TruncationStoppingPolicy + """ + + def __init__( + self, + *, + delay_evaluation: int = 0, + evaluation_interval: int = 0, + truncation_percentage: int = 0, + ) -> None: + super().__init__(delay_evaluation=delay_evaluation, evaluation_interval=evaluation_interval) + self.type = camel_to_snake(EarlyTerminationPolicyType.TRUNCATION_SELECTION) + self.truncation_percentage = truncation_percentage + + def _to_rest_object(self) -> RestTruncationSelectionPolicy: + return RestTruncationSelectionPolicy( + delay_evaluation=self.delay_evaluation, + evaluation_interval=self.evaluation_interval, + truncation_percentage=self.truncation_percentage, + ) + + @classmethod + def _from_rest_object(cls, obj: RestTruncationSelectionPolicy) -> "TruncationSelectionPolicy": + return cls( + delay_evaluation=obj.delay_evaluation, + evaluation_interval=obj.evaluation_interval, + truncation_percentage=obj.truncation_percentage, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/objective.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/objective.py new file mode 100644 index 00000000..45e13332 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/objective.py @@ -0,0 +1,53 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Optional + +from azure.ai.ml._restclient.v2023_08_01_preview.models import Objective as RestObjective +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class Objective(RestTranslatableMixin): + """Optimization objective. + + :param goal: Defines supported metric goals for hyperparameter tuning. Accepted values + are: "minimize", "maximize". + :type goal: str + :param primary_metric: The name of the metric to optimize. + :type primary_metric: str + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_bayesian_sampling_algorithm] + :end-before: [END configure_sweep_job_bayesian_sampling_algorithm] + :language: python + :dedent: 8 + :caption: Assigning an objective to a SweepJob. + """ + + def __init__(self, goal: Optional[str], primary_metric: Optional[str] = None) -> None: + """Optimization objective. + + :param goal: Defines supported metric goals for hyperparameter tuning. Acceptable values + are: "minimize" or "maximize". + :type goal: str + :param primary_metric: The name of the metric to optimize. + :type primary_metric: str + """ + if goal is not None: + self.goal = goal.lower() + self.primary_metric = primary_metric + + def _to_rest_object(self) -> RestObjective: + return RestObjective( + goal=self.goal, + primary_metric=self.primary_metric, + ) + + @classmethod + def _from_rest_object(cls, obj: RestObjective) -> Optional["Objective"]: + if not obj: + return None + + return cls(goal=obj.goal, primary_metric=obj.primary_metric) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/parameterized_sweep.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/parameterized_sweep.py new file mode 100644 index 00000000..5d69201f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/parameterized_sweep.py @@ -0,0 +1,341 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Any, Dict, List, Optional, Type, Union + +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException + +from ..job_limits import SweepJobLimits +from ..job_resource_configuration import JobResourceConfiguration +from ..queue_settings import QueueSettings +from .early_termination_policy import ( + BanditPolicy, + EarlyTerminationPolicy, + EarlyTerminationPolicyType, + MedianStoppingPolicy, + TruncationSelectionPolicy, +) +from .objective import Objective +from .sampling_algorithm import ( + BayesianSamplingAlgorithm, + GridSamplingAlgorithm, + RandomSamplingAlgorithm, + RestBayesianSamplingAlgorithm, + RestGridSamplingAlgorithm, + RestRandomSamplingAlgorithm, + RestSamplingAlgorithm, + SamplingAlgorithm, + SamplingAlgorithmType, +) + +SAMPLING_ALGORITHM_TO_REST_CONSTRUCTOR: Dict[SamplingAlgorithmType, Type[RestSamplingAlgorithm]] = { + SamplingAlgorithmType.RANDOM: RestRandomSamplingAlgorithm, + SamplingAlgorithmType.GRID: RestGridSamplingAlgorithm, + SamplingAlgorithmType.BAYESIAN: RestBayesianSamplingAlgorithm, +} + +SAMPLING_ALGORITHM_CONSTRUCTOR: Dict[SamplingAlgorithmType, Type[SamplingAlgorithm]] = { + SamplingAlgorithmType.RANDOM: RandomSamplingAlgorithm, + SamplingAlgorithmType.GRID: GridSamplingAlgorithm, + SamplingAlgorithmType.BAYESIAN: BayesianSamplingAlgorithm, +} + + +class ParameterizedSweep: # pylint:disable=too-many-instance-attributes + """Shared logic for standalone and pipeline sweep job.""" + + def __init__( + self, + limits: Optional[SweepJobLimits] = None, + sampling_algorithm: Optional[Union[str, SamplingAlgorithm]] = None, + objective: Optional[Union[Dict, Objective]] = None, + early_termination: Optional[Any] = None, + search_space: Optional[Dict] = None, + queue_settings: Optional[QueueSettings] = None, + resources: Optional[Union[dict, JobResourceConfiguration]] = None, + ) -> None: + """ + :param limits: Limits for sweep job. + :type limits: ~azure.ai.ml.sweep.SweepJobLimits + :param sampling_algorithm: Sampling algorithm for sweep job. + :type sampling_algorithm: ~azure.ai.ml.sweep.SamplingAlgorithm + :param objective: Objective for sweep job. + :type objective: ~azure.ai.ml.sweep.Objective + :param early_termination: Early termination policy for sweep job. + :type early_termination: ~azure.ai.ml.entities._job.sweep.early_termination_policy.EarlyTerminationPolicy + :param search_space: Search space for sweep job. + :type search_space: Dict[str, Union[ + ~azure.ai.ml.sweep.Choice, + ~azure.ai.ml.sweep.LogNormal, + ~azure.ai.ml.sweep.LogUniform, + ~azure.ai.ml.sweep.Normal, + ~azure.ai.ml.sweep.QLogNormal, + ~azure.ai.ml.sweep.QLogUniform, + ~azure.ai.ml.sweep.QNormal, + ~azure.ai.ml.sweep.QUniform, + ~azure.ai.ml.sweep.Randint, + ~azure.ai.ml.sweep.Uniform + + ]] + :param queue_settings: Queue settings for sweep job. + :type queue_settings: ~azure.ai.ml.entities.QueueSettings + :param resources: Compute Resource configuration for the job. + :type resources: ~azure.ai.ml.entities.ResourceConfiguration + """ + self.sampling_algorithm = sampling_algorithm + self.early_termination = early_termination # type: ignore[assignment] + self._limits = limits + self.search_space = search_space + self.queue_settings = queue_settings + self.objective: Optional[Objective] = None + self.resources = resources + + if isinstance(objective, Dict): + self.objective = Objective(**objective) + else: + self.objective = objective + + @property + def resources(self) -> Optional[Union[dict, JobResourceConfiguration]]: + """Resources for sweep job. + + :returns: Resources for sweep job. + :rtype: ~azure.ai.ml.entities.ResourceConfiguration + """ + return self._resources + + @resources.setter + def resources(self, value: Optional[Union[dict, JobResourceConfiguration]]) -> None: + """Set Resources for sweep job. + + :param value: Compute Resource configuration for the job. + :type value: ~azure.ai.ml.entities.ResourceConfiguration + """ + if isinstance(value, dict): + value = JobResourceConfiguration(**value) + self._resources = value + + @property + def limits(self) -> Optional[SweepJobLimits]: + """Limits for sweep job. + + :returns: Limits for sweep job. + :rtype: ~azure.ai.ml.sweep.SweepJobLimits + """ + return self._limits + + @limits.setter + def limits(self, value: SweepJobLimits) -> None: + """Set limits for sweep job. + + :param value: Limits for sweep job. + :type value: ~azure.ai.ml.sweep.SweepJobLimits + """ + if not isinstance(value, SweepJobLimits): + msg = f"limits must be SweepJobLimits but get {type(value)} instead" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SWEEP_JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + self._limits = value + + def set_resources( + self, + *, + instance_type: Optional[Union[str, List[str]]] = None, + instance_count: Optional[int] = None, + locations: Optional[List[str]] = None, + properties: Optional[Dict] = None, + docker_args: Optional[str] = None, + shm_size: Optional[str] = None, + ) -> None: + """Set resources for Sweep. + + :keyword instance_type: The instance type to use for the job. + :paramtype instance_type: Optional[Union[str, List[str]]] + :keyword instance_count: The number of instances to use for the job. + :paramtype instance_count: Optional[int] + :keyword locations: The locations to use for the job. + :paramtype locations: Optional[List[str]] + :keyword properties: The properties for the job. + :paramtype properties: Optional[Dict] + :keyword docker_args: The docker arguments for the job. + :paramtype docker_args: Optional[str] + :keyword shm_size: The shared memory size for the job. + :paramtype shm_size: Optional[str] + """ + if self.resources is None: + self.resources = JobResourceConfiguration() + + if not isinstance(self.resources, dict): + if locations is not None: + self.resources.locations = locations + if instance_type is not None: + self.resources.instance_type = instance_type + if instance_count is not None: + self.resources.instance_count = instance_count + if properties is not None: + self.resources.properties = properties + if docker_args is not None: + self.resources.docker_args = docker_args + if shm_size is not None: + self.resources.shm_size = shm_size + + def set_limits( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_total_trials: Optional[int] = None, + timeout: Optional[int] = None, + trial_timeout: Optional[int] = None, + ) -> None: + """Set limits for Sweep node. Leave parameters as None if you don't want to update corresponding values. + + :keyword max_concurrent_trials: maximum concurrent trial number. + :paramtype max_concurrent_trials: int + :keyword max_total_trials: maximum total trial number. + :paramtype max_total_trials: int + :keyword timeout: total timeout in seconds for sweep node + :paramtype timeout: int + :keyword trial_timeout: timeout in seconds for each trial + :paramtype trial_timeout: int + """ + # Looks related to https://github.com/pylint-dev/pylint/issues/3502, still an open issue + # pylint:disable=attribute-defined-outside-init + if self._limits is None: + self._limits = SweepJobLimits( + max_concurrent_trials=max_concurrent_trials, + max_total_trials=max_total_trials, + timeout=timeout, + trial_timeout=trial_timeout, + ) + else: + if self.limits is not None: + if max_concurrent_trials is not None: + self.limits.max_concurrent_trials = max_concurrent_trials + if max_total_trials is not None: + self.limits.max_total_trials = max_total_trials + if timeout is not None: + self.limits.timeout = timeout + if trial_timeout is not None: + self.limits.trial_timeout = trial_timeout + + def set_objective(self, *, goal: Optional[str] = None, primary_metric: Optional[str] = None) -> None: + """Set the sweep object.. Leave parameters as None if you don't want to update corresponding values. + + :keyword goal: Defines supported metric goals for hyperparameter tuning. Acceptable values are: + "minimize" and "maximize". + :paramtype goal: str + :keyword primary_metric: Name of the metric to optimize. + :paramtype primary_metric: str + """ + + if self.objective is not None: + if goal: + self.objective.goal = goal + if primary_metric: + self.objective.primary_metric = primary_metric + else: + self.objective = Objective(goal=goal, primary_metric=primary_metric) + + @property + def sampling_algorithm(self) -> Optional[Union[str, SamplingAlgorithm]]: + """Sampling algorithm for sweep job. + + :returns: Sampling algorithm for sweep job. + :rtype: ~azure.ai.ml.sweep.SamplingAlgorithm + """ + return self._sampling_algorithm + + @sampling_algorithm.setter + def sampling_algorithm(self, value: Optional[Union[SamplingAlgorithm, str]] = None) -> None: + """Set sampling algorithm for sweep job. + + :param value: Sampling algorithm for sweep job. + :type value: ~azure.ai.ml.sweep.SamplingAlgorithm + """ + if value is None: + self._sampling_algorithm = None + elif isinstance(value, SamplingAlgorithm) or ( + isinstance(value, str) and value.lower().capitalize() in SAMPLING_ALGORITHM_CONSTRUCTOR + ): + self._sampling_algorithm = value + else: + msg = f"unsupported sampling algorithm: {value}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SWEEP_JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + def _get_rest_sampling_algorithm(self) -> RestSamplingAlgorithm: + # TODO: self.sampling_algorithm will always return SamplingAlgorithm + if isinstance(self.sampling_algorithm, SamplingAlgorithm): + return self.sampling_algorithm._to_rest_object() # pylint: disable=protected-access + + if isinstance(self.sampling_algorithm, str): + return SAMPLING_ALGORITHM_CONSTRUCTOR[ # pylint: disable=protected-access + SamplingAlgorithmType(self.sampling_algorithm.lower().capitalize()) + ]()._to_rest_object() + + msg = f"Received unsupported value {self._sampling_algorithm} as the sampling algorithm" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SWEEP_JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + + @property + def early_termination(self) -> Optional[Union[str, EarlyTerminationPolicy]]: + """Early termination policy for sweep job. + + :returns: Early termination policy for sweep job. + :rtype: ~azure.ai.ml.entities._job.sweep.early_termination_policy.EarlyTerminationPolicy + """ + return self._early_termination + + @early_termination.setter + def early_termination(self, value: Any) -> None: + """Set early termination policy for sweep job. + + :param value: Early termination policy for sweep job. + :type value: ~azure.ai.ml.entities._job.sweep.early_termination_policy.EarlyTerminationPolicy + """ + self._early_termination: Optional[Union[str, EarlyTerminationPolicy]] + if value is None: + self._early_termination = None + elif isinstance(value, EarlyTerminationPolicy): + self._early_termination = value + elif isinstance(value, str): + value = value.lower().capitalize() + if value == EarlyTerminationPolicyType.BANDIT: + self._early_termination = BanditPolicy() + elif value == EarlyTerminationPolicyType.MEDIAN_STOPPING: + self._early_termination = MedianStoppingPolicy() + elif value == EarlyTerminationPolicyType.TRUNCATION_SELECTION: + self._early_termination = TruncationSelectionPolicy() + else: + msg = f"Received unsupported value {value} as the early termination policy" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SWEEP_JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) + else: + msg = f"Received unsupported value of type {type(value)} as the early termination policy" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SWEEP_JOB, + error_category=ErrorCategory.USER_ERROR, + error_type=ValidationErrorType.INVALID_VALUE, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/sampling_algorithm.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/sampling_algorithm.py new file mode 100644 index 00000000..d0bf795d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/sampling_algorithm.py @@ -0,0 +1,141 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from abc import ABC +from typing import Any, Optional, Union, cast + +from azure.ai.ml._restclient.v2023_08_01_preview.models import ( + BayesianSamplingAlgorithm as RestBayesianSamplingAlgorithm, +) +from azure.ai.ml._restclient.v2023_08_01_preview.models import GridSamplingAlgorithm as RestGridSamplingAlgorithm +from azure.ai.ml._restclient.v2023_08_01_preview.models import RandomSamplingAlgorithm as RestRandomSamplingAlgorithm +from azure.ai.ml._restclient.v2023_08_01_preview.models import SamplingAlgorithm as RestSamplingAlgorithm +from azure.ai.ml._restclient.v2023_08_01_preview.models import SamplingAlgorithmType +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class SamplingAlgorithm(ABC, RestTranslatableMixin): + """Base class for sampling algorithms. + + This class should not be instantiated directly. Instead, use one of its subclasses. + """ + + def __init__(self) -> None: + self.type = None + + @classmethod + def _from_rest_object(cls, obj: RestSamplingAlgorithm) -> Optional["SamplingAlgorithm"]: + if not obj: + return None + + sampling_algorithm: Any = None + if obj.sampling_algorithm_type == SamplingAlgorithmType.RANDOM: + sampling_algorithm = RandomSamplingAlgorithm._from_rest_object(obj) # pylint: disable=protected-access + + if obj.sampling_algorithm_type == SamplingAlgorithmType.GRID: + sampling_algorithm = GridSamplingAlgorithm._from_rest_object(obj) # pylint: disable=protected-access + + if obj.sampling_algorithm_type == SamplingAlgorithmType.BAYESIAN: + sampling_algorithm = BayesianSamplingAlgorithm._from_rest_object(obj) # pylint: disable=protected-access + + return cast(Optional["SamplingAlgorithm"], sampling_algorithm) + + +class RandomSamplingAlgorithm(SamplingAlgorithm): + """Random Sampling Algorithm. + + :keyword rule: The specific type of random algorithm. Accepted values are: "random" and "sobol". + :type rule: str + :keyword seed: The seed for random number generation. + :paramtype seed: int + :keyword logbase: A positive number or the number "e" in string format to be used as the base for log + based random sampling. + :paramtype logbase: Union[float, str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_random_sampling_algorithm] + :end-before: [END configure_sweep_job_random_sampling_algorithm] + :language: python + :dedent: 8 + :caption: Assigning a random sampling algorithm for a SweepJob + """ + + def __init__( + self, + *, + rule: Optional[str] = None, + seed: Optional[int] = None, + logbase: Optional[Union[float, str]] = None, + ) -> None: + super().__init__() + self.type = SamplingAlgorithmType.RANDOM.lower() + self.rule = rule + self.seed = seed + self.logbase = logbase + + def _to_rest_object(self) -> RestRandomSamplingAlgorithm: + return RestRandomSamplingAlgorithm( + rule=self.rule, + seed=self.seed, + logbase=self.logbase, + ) + + @classmethod + def _from_rest_object(cls, obj: RestRandomSamplingAlgorithm) -> "RandomSamplingAlgorithm": + return cls( + rule=obj.rule, + seed=obj.seed, + logbase=obj.logbase, + ) + + +class GridSamplingAlgorithm(SamplingAlgorithm): + """Grid Sampling Algorithm. + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_grid_sampling_algorithm] + :end-before: [END configure_sweep_job_grid_sampling_algorithm] + :language: python + :dedent: 8 + :caption: Assigning a grid sampling algorithm for a SweepJob + """ + + def __init__(self) -> None: + super().__init__() + self.type = SamplingAlgorithmType.GRID.lower() + + def _to_rest_object(self) -> RestGridSamplingAlgorithm: + return RestGridSamplingAlgorithm() + + @classmethod + def _from_rest_object(cls, obj: RestGridSamplingAlgorithm) -> "GridSamplingAlgorithm": + return cls() + + +class BayesianSamplingAlgorithm(SamplingAlgorithm): + """Bayesian Sampling Algorithm. + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_bayesian_sampling_algorithm] + :end-before: [END configure_sweep_job_bayesian_sampling_algorithm] + :language: python + :dedent: 8 + :caption: Assigning a Bayesian sampling algorithm for a SweepJob + """ + + def __init__(self) -> None: + super().__init__() + self.type = SamplingAlgorithmType.BAYESIAN.lower() + + def _to_rest_object(self) -> RestBayesianSamplingAlgorithm: + return RestBayesianSamplingAlgorithm() + + @classmethod + def _from_rest_object(cls, obj: RestBayesianSamplingAlgorithm) -> "BayesianSamplingAlgorithm": + return cls() diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/search_space.py new file mode 100644 index 00000000..bbc08d98 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/search_space.py @@ -0,0 +1,393 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from abc import ABC +from typing import Any, List, Optional, Union + +from azure.ai.ml.constants._common import TYPE +from azure.ai.ml.constants._job.sweep import SearchSpace +from azure.ai.ml.entities._mixins import RestTranslatableMixin +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, JobException + + +class SweepDistribution(ABC, RestTranslatableMixin): + """Base class for sweep distribution configuration. + + This class should not be instantiated directly. Instead, use one of its subclasses. + + :keyword type: Type of distribution. + :paramtype type: str + """ + + def __init__(self, *, type: Optional[str] = None) -> None: # pylint: disable=redefined-builtin + self.type = type + + @classmethod + def _from_rest_object(cls, obj: List) -> "SweepDistribution": + mapping = { + SearchSpace.CHOICE: Choice, + SearchSpace.NORMAL: Normal, + SearchSpace.LOGNORMAL: LogNormal, + SearchSpace.QNORMAL: QNormal, + SearchSpace.QLOGNORMAL: QLogNormal, + SearchSpace.RANDINT: Randint, + SearchSpace.UNIFORM: Uniform, + SearchSpace.QUNIFORM: QUniform, + SearchSpace.LOGUNIFORM: LogUniform, + SearchSpace.QLOGUNIFORM: QLogUniform, + } + + ss_class: Any = mapping.get(obj[0], None) + if ss_class: + res: SweepDistribution = ss_class._from_rest_object(obj) + return res + + msg = f"Unknown search space type: {obj[0]}" + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SWEEP_JOB, + error_category=ErrorCategory.SYSTEM_ERROR, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, SweepDistribution): + return NotImplemented + res: bool = self._to_rest_object() == other._to_rest_object() + return res + + +class Choice(SweepDistribution): + """Choice distribution configuration. + + :param values: List of values to choose from. + :type values: list[Union[float, str, dict]] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_choice_loguniform] + :end-before: [END configure_sweep_job_choice_loguniform] + :language: python + :dedent: 8 + :caption: Using Choice distribution to set values for a hyperparameter sweep + """ + + def __init__(self, values: Optional[List[Union[float, str, dict]]] = None, **kwargs: Any) -> None: + kwargs.setdefault(TYPE, SearchSpace.CHOICE) + super().__init__(**kwargs) + self.values = values + + def _to_rest_object(self) -> List: + items: List = [] + if self.values is not None: + for value in self.values: + if isinstance(value, dict): + rest_dict = {} + for k, v in value.items(): + if isinstance(v, SweepDistribution): + rest_dict[k] = v._to_rest_object() + else: + rest_dict[k] = v + items.append(rest_dict) + else: + items.append(value) + return [self.type, [items]] + + @classmethod + def _from_rest_object(cls, obj: List) -> "Choice": + rest_values = obj[1][0] + from_rest_values = [] + for rest_value in rest_values: + if isinstance(rest_value, dict): + from_rest_dict = {} + for k, v in rest_value.items(): + try: + # first assume that any dictionary value is a valid distribution (i.e. normal, uniform, etc) + # and try to deserialize it into a the correct SDK distribution object + from_rest_dict[k] = SweepDistribution._from_rest_object(v) + except Exception: # pylint: disable=W0718 + # if an exception is raised, assume that the value was not a valid distribution and use the + # value as it is for deserialization + from_rest_dict[k] = v + from_rest_values.append(from_rest_dict) + else: + from_rest_values.append(rest_value) + return Choice(values=from_rest_values) # type: ignore[arg-type] + + +class Normal(SweepDistribution): + """Normal distribution configuration. + + :param mu: Mean of the distribution. + :type mu: float + :param sigma: Standard deviation of the distribution. + :type sigma: float + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_randint_normal] + :end-before: [END configure_sweep_job_randint_normal] + :language: python + :dedent: 8 + :caption: Configuring Normal distributions for a hyperparameter sweep on a Command job. + """ + + def __init__(self, mu: Optional[float] = None, sigma: Optional[float] = None, **kwargs: Any) -> None: + kwargs.setdefault(TYPE, SearchSpace.NORMAL) + super().__init__(**kwargs) + self.mu = mu + self.sigma = sigma + + def _to_rest_object(self) -> List: + return [self.type, [self.mu, self.sigma]] + + @classmethod + def _from_rest_object(cls, obj: List) -> "Normal": + return cls(mu=obj[1][0], sigma=obj[1][1]) + + +class LogNormal(Normal): + """LogNormal distribution configuration. + + :param mu: Mean of the log of the distribution. + :type mu: float + :param sigma: Standard deviation of the log of the distribution. + :type sigma: float + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_lognormal_qlognormal] + :end-before: [END configure_sweep_job_lognormal_qlognormal] + :language: python + :dedent: 8 + :caption: Configuring LogNormal distributions for a hyperparameter sweep on a Command job. + """ + + def __init__(self, mu: Optional[float] = None, sigma: Optional[float] = None, **kwargs: Any) -> None: + kwargs.setdefault(TYPE, SearchSpace.LOGNORMAL) + super().__init__(mu=mu, sigma=sigma, **kwargs) + + +class QNormal(Normal): + """QNormal distribution configuration. + + :param mu: Mean of the distribution. + :type mu: float + :param sigma: Standard deviation of the distribution. + :type sigma: float + :param q: Quantization factor. + :type q: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_qloguniform_qnormal] + :end-before: [END configure_sweep_job_qloguniform_qnormal] + :language: python + :dedent: 8 + :caption: Configuring QNormal distributions for a hyperparameter sweep on a Command job. + """ + + def __init__( + self, mu: Optional[float] = None, sigma: Optional[float] = None, q: Optional[int] = None, **kwargs: Any + ) -> None: + kwargs.setdefault(TYPE, SearchSpace.QNORMAL) + super().__init__(mu=mu, sigma=sigma, **kwargs) + self.q = q + + def _to_rest_object(self) -> List: + return [self.type, [self.mu, self.sigma, self.q]] + + @classmethod + def _from_rest_object(cls, obj: List) -> "QNormal": + return cls(mu=obj[1][0], sigma=obj[1][1], q=obj[1][2]) + + +class QLogNormal(QNormal): + """QLogNormal distribution configuration. + + :param mu: Mean of the log of the distribution. + :type mu: Optional[float] + :param sigma: Standard deviation of the log of the distribution. + :type sigma: Optional[float] + :param q: Quantization factor. + :type q: Optional[int] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_lognormal_qlognormal] + :end-before: [END configure_sweep_job_lognormal_qlognormal] + :language: python + :dedent: 8 + :caption: Configuring QLogNormal distributions for a hyperparameter sweep on a Command job. + """ + + def __init__( + self, mu: Optional[float] = None, sigma: Optional[float] = None, q: Optional[int] = None, **kwargs: Any + ) -> None: + kwargs.setdefault(TYPE, SearchSpace.QLOGNORMAL) + super().__init__(mu=mu, sigma=sigma, q=q, **kwargs) + + +class Randint(SweepDistribution): + """Randint distribution configuration. + + :param upper: Upper bound of the distribution. + :type upper: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_randint_normal] + :end-before: [END configure_sweep_job_randint_normal] + :language: python + :dedent: 8 + :caption: Configuring Randint distributions for a hyperparameter sweep on a Command job. + """ + + def __init__(self, upper: Optional[int] = None, **kwargs: Any) -> None: + kwargs.setdefault(TYPE, SearchSpace.RANDINT) + super().__init__(**kwargs) + self.upper = upper + + def _to_rest_object(self) -> List: + return [self.type, [self.upper]] + + @classmethod + def _from_rest_object(cls, obj: List) -> "Randint": + return cls(upper=obj[1][0]) + + +class Uniform(SweepDistribution): + """ + + Uniform distribution configuration. + + :param min_value: Minimum value of the distribution. + :type min_value: float + :param max_value: Maximum value of the distribution. + :type max_value: float + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_uniform] + :end-before: [END configure_sweep_job_uniform] + :language: python + :dedent: 8 + :caption: Configuring Uniform distributions for learning rates and momentum + during a hyperparameter sweep on a Command job. + """ + + def __init__(self, min_value: Optional[float] = None, max_value: Optional[float] = None, **kwargs: Any) -> None: + kwargs.setdefault(TYPE, SearchSpace.UNIFORM) + super().__init__(**kwargs) + self.min_value = min_value + self.max_value = max_value + + def _to_rest_object(self) -> List: + return [self.type, [self.min_value, self.max_value]] + + @classmethod + def _from_rest_object(cls, obj: List) -> "Uniform": + return cls(min_value=obj[1][0], max_value=obj[1][1]) + + +class LogUniform(Uniform): + """LogUniform distribution configuration. + + :param min_value: Minimum value of the log of the distribution. + :type min_value: float + :param max_value: Maximum value of the log of the distribution. + :type max_value: float + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_choice_loguniform] + :end-before: [END configure_sweep_job_choice_loguniform] + :language: python + :dedent: 8 + :caption: Configuring a LogUniform distribution for a hyperparameter sweep job learning rate + """ + + def __init__(self, min_value: Optional[float] = None, max_value: Optional[float] = None, **kwargs: Any) -> None: + kwargs.setdefault(TYPE, SearchSpace.LOGUNIFORM) + super().__init__(min_value=min_value, max_value=max_value, **kwargs) + + +class QUniform(Uniform): + """QUniform distribution configuration. + + :param min_value: Minimum value of the distribution. + :type min_value: Optional[Union[int, float]] + :param max_value: Maximum value of the distribution. + :type max_value: Optional[Union[int, float]] + :param q: Quantization factor. + :type q: Optional[int] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_truncation_selection_policy] + :end-before: [END configure_sweep_job_truncation_selection_policy] + :language: python + :dedent: 8 + :caption: Configuring QUniform distributions for a hyperparameter sweep on a Command job. + """ + + def __init__( + self, + min_value: Optional[Union[int, float]] = None, + max_value: Optional[Union[int, float]] = None, + q: Optional[int] = None, + **kwargs: Any, + ) -> None: + kwargs.setdefault(TYPE, SearchSpace.QUNIFORM) + super().__init__(min_value=min_value, max_value=max_value, **kwargs) + self.q = q + + def _to_rest_object(self) -> List: + return [self.type, [self.min_value, self.max_value, self.q]] + + @classmethod + def _from_rest_object(cls, obj: List) -> "QUniform": + return cls(min_value=obj[1][0], max_value=obj[1][1], q=obj[1][2]) + + +class QLogUniform(QUniform): + """QLogUniform distribution configuration. + + :param min_value: Minimum value of the log of the distribution. + :type min_value: Optional[float] + :param max_value: Maximum value of the log of the distribution. + :type max_value: Optional[float] + :param q: Quantization factor. + :type q: Optional[int] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_qloguniform_qnormal] + :end-before: [END configure_sweep_job_qloguniform_qnormal] + :language: python + :dedent: 8 + :caption: Configuring QLogUniform distributions for a hyperparameter sweep on a Command job. + """ + + def __init__( + self, + min_value: Optional[float] = None, + max_value: Optional[float] = None, + q: Optional[int] = None, + **kwargs: Any, + ) -> None: + kwargs.setdefault(TYPE, SearchSpace.QLOGUNIFORM) + super().__init__(min_value=min_value, max_value=max_value, q=q, **kwargs) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/sweep_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/sweep_job.py new file mode 100644 index 00000000..0a99bb39 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/sweep/sweep_job.py @@ -0,0 +1,361 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import logging +from typing import Any, Dict, NoReturn, Optional, Union + +from azure.ai.ml._restclient.v2023_08_01_preview.models import JobBase +from azure.ai.ml._restclient.v2023_08_01_preview.models import SweepJob as RestSweepJob +from azure.ai.ml._restclient.v2023_08_01_preview.models import TrialComponent +from azure.ai.ml._schema._sweep.sweep_job import SweepJobSchema +from azure.ai.ml._utils.utils import map_single_brackets_and_warn +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.entities._component.command_component import CommandComponent +from azure.ai.ml.entities._credentials import ( + AmlTokenConfiguration, + ManagedIdentityConfiguration, + UserIdentityConfiguration, + _BaseJobIdentityConfiguration, +) +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import ( + from_rest_data_outputs, + from_rest_inputs_to_dataset_literal, + to_rest_data_outputs, + to_rest_dataset_literal_inputs, + validate_inputs_for_command, + validate_key_contains_allowed_characters, +) +from azure.ai.ml.entities._job.command_job import CommandJob +from azure.ai.ml.entities._job.job import Job +from azure.ai.ml.entities._job.job_io_mixin import JobIOMixin +from azure.ai.ml.entities._job.job_resource_configuration import JobResourceConfiguration +from azure.ai.ml.entities._job.sweep.sampling_algorithm import SamplingAlgorithm +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, JobException + +# from ..identity import AmlToken, Identity, ManagedIdentity, UserIdentity +from ..job_limits import SweepJobLimits +from ..parameterized_command import ParameterizedCommand +from ..queue_settings import QueueSettings +from .early_termination_policy import ( + BanditPolicy, + EarlyTerminationPolicy, + MedianStoppingPolicy, + TruncationSelectionPolicy, +) +from .objective import Objective +from .parameterized_sweep import ParameterizedSweep +from .search_space import ( + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + SweepDistribution, + Uniform, +) + +module_logger = logging.getLogger(__name__) + + +class SweepJob(Job, ParameterizedSweep, JobIOMixin): + """Sweep job for hyperparameter tuning. + + .. note:: + For sweep jobs, inputs, outputs, and parameters are accessible as environment variables using the prefix + ``AZUREML_SWEEP_``. For example, if you have a parameter named "learning_rate", you can access it as + ``AZUREML_SWEEP_learning_rate``. + + :keyword name: Name of the job. + :paramtype name: str + :keyword display_name: Display name of the job. + :paramtype display_name: str + :keyword description: Description of the job. + :paramtype description: str + :keyword tags: Tag dictionary. Tags can be added, removed, and updated. + :paramtype tags: dict[str, str] + :keyword properties: The asset property dictionary. + :paramtype properties: dict[str, str] + :keyword experiment_name: Name of the experiment the job will be created under. If None is provided, + job will be created under experiment 'Default'. + :paramtype experiment_name: str + :keyword identity: Identity that the training job will use while running on compute. + :paramtype identity: Union[ + ~azure.ai.ml.ManagedIdentityConfiguration, + ~azure.ai.ml.AmlTokenConfiguration, + ~azure.ai.ml.UserIdentityConfiguration + + ] + + :keyword inputs: Inputs to the command. + :paramtype inputs: dict + :keyword outputs: Mapping of output data bindings used in the job. + :paramtype outputs: dict[str, ~azure.ai.ml.Output] + :keyword sampling_algorithm: The hyperparameter sampling algorithm to use over the `search_space`. Defaults to + "random". + + :paramtype sampling_algorithm: str + :keyword search_space: Dictionary of the hyperparameter search space. The key is the name of the hyperparameter + and the value is the parameter expression. + + :paramtype search_space: Dict + :keyword objective: Metric to optimize for. + :paramtype objective: Objective + :keyword compute: The compute target the job runs on. + :paramtype compute: str + :keyword trial: The job configuration for each trial. Each trial will be provided with a different combination + of hyperparameter values that the system samples from the search_space. + + :paramtype trial: Union[ + ~azure.ai.ml.entities.CommandJob, + ~azure.ai.ml.entities.CommandComponent + + ] + + :keyword early_termination: The early termination policy to use. A trial job is canceled + when the criteria of the specified policy are met. If omitted, no early termination policy will be applied. + + :paramtype early_termination: Union[ + ~azure.mgmt.machinelearningservices.models.BanditPolicy, + ~azure.mgmt.machinelearningservices.models.MedianStoppingPolicy, + ~azure.mgmt.machinelearningservices.models.TruncationSelectionPolicy + + ] + + :keyword limits: Limits for the sweep job. + :paramtype limits: ~azure.ai.ml.entities.SweepJobLimits + :keyword queue_settings: Queue settings for the job. + :paramtype queue_settings: ~azure.ai.ml.entities.QueueSettings + :keyword resources: Compute Resource configuration for the job. + :paramtype resources: Optional[Union[~azure.ai.ml.entities.ResourceConfiguration] + :keyword kwargs: A dictionary of additional configuration parameters. + :paramtype kwargs: dict + + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_sweep_configurations.py + :start-after: [START configure_sweep_job_bayesian_sampling_algorithm] + :end-before: [END configure_sweep_job_bayesian_sampling_algorithm] + :language: python + :dedent: 8 + :caption: Creating a SweepJob + """ + + def __init__( + self, + *, + name: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[Dict] = None, + display_name: Optional[str] = None, + experiment_name: Optional[str] = None, + identity: Optional[ + Union[ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration] + ] = None, + inputs: Optional[Dict[str, Union[Input, str, bool, int, float]]] = None, + outputs: Optional[Dict] = None, + compute: Optional[str] = None, + limits: Optional[SweepJobLimits] = None, + sampling_algorithm: Optional[Union[str, SamplingAlgorithm]] = None, + search_space: Optional[ + Dict[ + str, + Union[ + Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ], + ] + ] = None, + objective: Optional[Objective] = None, + trial: Optional[Union[CommandJob, CommandComponent]] = None, + early_termination: Optional[ + Union[EarlyTerminationPolicy, BanditPolicy, MedianStoppingPolicy, TruncationSelectionPolicy] + ] = None, + queue_settings: Optional[QueueSettings] = None, + resources: Optional[Union[dict, JobResourceConfiguration]] = None, + **kwargs: Any, + ) -> None: + kwargs[TYPE] = JobType.SWEEP + + Job.__init__( + self, + name=name, + description=description, + tags=tags, + display_name=display_name, + experiment_name=experiment_name, + compute=compute, + **kwargs, + ) + self.inputs = inputs # type: ignore[assignment] + self.outputs = outputs # type: ignore[assignment] + self.trial = trial + self.identity = identity + + ParameterizedSweep.__init__( + self, + limits=limits, + sampling_algorithm=sampling_algorithm, + objective=objective, + early_termination=early_termination, + search_space=search_space, + queue_settings=queue_settings, + resources=resources, + ) + + def _to_dict(self) -> Dict: + res: dict = SweepJobSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def _to_rest_object(self) -> JobBase: + self._override_missing_properties_from_trial() + if self.trial is not None: + self.trial.command = map_single_brackets_and_warn(self.trial.command) + + if self.search_space is not None: + search_space = {param: space._to_rest_object() for (param, space) in self.search_space.items()} + + if self.trial is not None: + validate_inputs_for_command(self.trial.command, self.inputs) + for key in search_space.keys(): # pylint: disable=possibly-used-before-assignment + validate_key_contains_allowed_characters(key) + + if self.trial is not None: + trial_component = TrialComponent( + code_id=self.trial.code, + distribution=( + self.trial.distribution._to_rest_object() + if self.trial.distribution and not isinstance(self.trial.distribution, Dict) + else None + ), + environment_id=self.trial.environment, + command=self.trial.command, + environment_variables=self.trial.environment_variables, + resources=( + self.trial.resources._to_rest_object() + if self.trial.resources and not isinstance(self.trial.resources, Dict) + else None + ), + ) + + sweep_job = RestSweepJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + search_space=search_space, + sampling_algorithm=self._get_rest_sampling_algorithm() if self.sampling_algorithm else None, + limits=self.limits._to_rest_object() if self.limits else None, + early_termination=( + self.early_termination._to_rest_object() + if self.early_termination and not isinstance(self.early_termination, str) + else None + ), + properties=self.properties, + compute_id=self.compute, + objective=self.objective._to_rest_object() if self.objective else None, + trial=trial_component, # pylint: disable=possibly-used-before-assignment + tags=self.tags, + inputs=to_rest_dataset_literal_inputs(self.inputs, job_type=self.type), + outputs=to_rest_data_outputs(self.outputs), + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings._to_rest_object() if self.queue_settings else None, + resources=( + self.resources._to_rest_object() if self.resources and not isinstance(self.resources, dict) else None + ), + ) + + if not sweep_job.resources and sweep_job.trial.resources: + sweep_job.resources = sweep_job.trial.resources + + sweep_job_resource = JobBase(properties=sweep_job) + sweep_job_resource.name = self.name + return sweep_job_resource + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> NoReturn: + msg = "no sweep component entity" + raise JobException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.SWEEP_JOB, + error_category=ErrorCategory.USER_ERROR, + ) + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "SweepJob": + loaded_schema = load_from_dict(SweepJobSchema, data, context, additional_message, **kwargs) + loaded_schema["trial"] = ParameterizedCommand(**(loaded_schema["trial"])) + sweep_job = SweepJob(base_path=context[BASE_PATH_CONTEXT_KEY], **loaded_schema) + return sweep_job + + @classmethod + def _load_from_rest(cls, obj: JobBase) -> "SweepJob": + properties: RestSweepJob = obj.properties + + # Unpack termination schema + early_termination = EarlyTerminationPolicy._from_rest_object(properties.early_termination) + + # Unpack sampling algorithm + sampling_algorithm = SamplingAlgorithm._from_rest_object(properties.sampling_algorithm) + + trial = ParameterizedCommand._load_from_sweep_job(obj.properties) + # Compute also appears in both layers of the yaml, but only one of the REST. + # This should be a required field in one place, but cannot be if its optional in two + + _search_space = {} + for param, dist in properties.search_space.items(): + _search_space[param] = SweepDistribution._from_rest_object(dist) + + return SweepJob( + name=obj.name, + id=obj.id, + display_name=properties.display_name, + description=properties.description, + properties=properties.properties, + tags=properties.tags, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + trial=trial, # type: ignore[arg-type] + compute=properties.compute_id, + sampling_algorithm=sampling_algorithm, + search_space=_search_space, # type: ignore[arg-type] + limits=SweepJobLimits._from_rest_object(properties.limits), + early_termination=early_termination, + objective=Objective._from_rest_object(properties.objective) if properties.objective else None, + inputs=from_rest_inputs_to_dataset_literal(properties.inputs), + outputs=from_rest_data_outputs(properties.outputs), + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + resources=properties.resources if hasattr(properties, "resources") else None, + ) + + def _override_missing_properties_from_trial(self) -> None: + if not isinstance(self.trial, CommandJob): + return + + if not self.compute: + self.compute = self.trial.compute + if not self.inputs: + self.inputs = self.trial.inputs + if not self.outputs: + self.outputs = self.trial.outputs + + has_trial_limits_timeout = self.trial.limits and self.trial.limits.timeout + if has_trial_limits_timeout and not self.limits: + time_out = self.trial.limits.timeout if self.trial.limits is not None else None + self.limits = SweepJobLimits(trial_timeout=time_out) + elif has_trial_limits_timeout and self.limits is not None and not self.limits.trial_timeout: + self.limits.trial_timeout = self.trial.limits.timeout if self.trial.limits is not None else None diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/to_rest_functions.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/to_rest_functions.py new file mode 100644 index 00000000..472cbc91 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/to_rest_functions.py @@ -0,0 +1,82 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from functools import singledispatch +from pathlib import Path +from typing import Any + +from azure.ai.ml._restclient.v2023_08_01_preview.models import JobBase as JobBaseData +from azure.ai.ml._restclient.v2025_01_01_preview.models import JobBase as JobBaseData202501 +from azure.ai.ml.constants._common import DEFAULT_EXPERIMENT_NAME +from azure.ai.ml.entities._builders.command import Command +from azure.ai.ml.entities._builders.pipeline import Pipeline +from azure.ai.ml.entities._builders.spark import Spark +from azure.ai.ml.entities._builders.sweep import Sweep +from azure.ai.ml.entities._job.job_name_generator import generate_job_name + +from .import_job import ImportJob +from .job import Job + + +def generate_defaults(job: Job, rest_job: JobBaseData) -> None: + # Default name to a generated user friendly name. + if not job.name: + rest_job.name = generate_job_name() + + if not job.display_name: + rest_job.properties.display_name = rest_job.name + + # Default experiment to current folder name or "Default" + if not job.experiment_name: + rest_job.properties.experiment_name = Path("./").resolve().stem.replace(" ", "") or DEFAULT_EXPERIMENT_NAME + + +@singledispatch +def to_rest_job_object(something: Any) -> JobBaseData: + raise NotImplementedError() + + +@to_rest_job_object.register(Job) +def _(job: Job) -> JobBaseData: + # TODO: Bug Item number: 2883432 + rest_job = job._to_rest_object() # type: ignore + generate_defaults(job, rest_job) + return rest_job + + +@to_rest_job_object.register(Command) +def _(command: Command) -> JobBaseData202501: + rest_job = command._to_job()._to_rest_object() + generate_defaults(command, rest_job) + return rest_job + + +@to_rest_job_object.register(Sweep) +def _(sweep: Sweep) -> JobBaseData: + rest_job = sweep._to_job()._to_rest_object() + generate_defaults(sweep, rest_job) + return rest_job + + +@to_rest_job_object.register(Pipeline) +def _(pipeline: Pipeline) -> JobBaseData: + rest_job = pipeline._to_job()._to_rest_object() + generate_defaults(pipeline, rest_job) + return rest_job + + +@to_rest_job_object.register(Spark) +def _(spark: Spark) -> JobBaseData: + rest_job = spark._to_job()._to_rest_object() + generate_defaults(spark, rest_job) + return rest_job + + +@to_rest_job_object.register(ImportJob) +def _(importJob: ImportJob) -> JobBaseData: + rest_job = importJob._to_rest_object() + generate_defaults(importJob, rest_job) + return rest_job |
