diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl')
40 files changed, 10147 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/__init__.py new file mode 100644 index 00000000..e99e9321 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/__init__.py @@ -0,0 +1,16 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) + +from .search_space import SearchSpace +from .stack_ensemble_settings import StackEnsembleSettings +from .training_settings import ClassificationTrainingSettings, TrainingSettings + +__all__ = [ + "ClassificationTrainingSettings", + "TrainingSettings", + "SearchSpace", + "StackEnsembleSettings", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_job.py new file mode 100644 index 00000000..9e1b4d05 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_job.py @@ -0,0 +1,283 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import logging +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + JobBase, + MLTableJobInput, + QueueSettings, + ResourceConfiguration, + TaskType, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants import JobType +from azure.ai.ml.constants._common import TYPE, AssetTypes +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import ( + AmlTokenConfiguration, + ManagedIdentityConfiguration, + UserIdentityConfiguration, +) +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.job import Job +from azure.ai.ml.entities._job.job_io_mixin import JobIOMixin +from azure.ai.ml.entities._job.pipeline._io import AutoMLNodeIOMixin +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + +module_logger = logging.getLogger(__name__) + + +class AutoMLJob(Job, JobIOMixin, AutoMLNodeIOMixin, ABC): + """Initialize an AutoML job entity. + + Constructor for an AutoMLJob. + + :keyword resources: Resource configuration for the AutoML job, defaults to None + :paramtype resources: typing.Optional[ResourceConfiguration] + :keyword identity: Identity that training job will use while running on compute, defaults to None + :paramtype identity: typing.Optional[ typing.Union[ManagedIdentityConfiguration, AmlTokenConfiguration + , UserIdentityConfiguration] ] + :keyword environment_id: The environment id for the AutoML job, defaults to None + :paramtype environment_id: typing.Optional[str] + :keyword environment_variables: The environment variables for the AutoML job, defaults to None + :paramtype environment_variables: typing.Optional[Dict[str, str]] + :keyword outputs: The outputs for the AutoML job, defaults to None + :paramtype outputs: typing.Optional[Dict[str, str]] + :keyword queue_settings: The queue settings for the AutoML job, defaults to None + :paramtype queue_settings: typing.Optional[QueueSettings] + :raises ValidationException: task type validation error + :raises NotImplementedError: Raises NotImplementedError + :return: An AutoML Job + :rtype: AutoMLJob + """ + + def __init__( + self, + *, + resources: Optional[ResourceConfiguration] = None, + identity: Optional[ + Union[ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration] + ] = None, + queue_settings: Optional[QueueSettings] = None, + **kwargs: Any, + ) -> None: + """Initialize an AutoML job entity. + + Constructor for an AutoMLJob. + + :keyword resources: Resource configuration for the AutoML job, defaults to None + :paramtype resources: typing.Optional[ResourceConfiguration] + :keyword identity: Identity that training job will use while running on compute, defaults to None + :paramtype identity: typing.Optional[ typing.Union[ManagedIdentityConfiguration, AmlTokenConfiguration + , UserIdentityConfiguration] ] + :keyword environment_id: The environment id for the AutoML job, defaults to None + :paramtype environment_id: typing.Optional[str] + :keyword environment_variables: The environment variables for the AutoML job, defaults to None + :paramtype environment_variables: typing.Optional[Dict[str, str]] + :keyword outputs: The outputs for the AutoML job, defaults to None + :paramtype outputs: typing.Optional[Dict[str, str]] + :keyword queue_settings: The queue settings for the AutoML job, defaults to None + :paramtype queue_settings: typing.Optional[QueueSettings] + :raises ValidationException: task type validation error + :raises NotImplementedError: Raises NotImplementedError + """ + kwargs[TYPE] = JobType.AUTOML + self.environment_id = kwargs.pop("environment_id", None) + self.environment_variables = kwargs.pop("environment_variables", None) + self.outputs = kwargs.pop("outputs", None) + + super().__init__(**kwargs) + + self.resources = resources + self.identity = identity + self.queue_settings = queue_settings + + @property + @abstractmethod + def training_data(self) -> Input: + """The training data for the AutoML job. + + :raises NotImplementedError: Raises NotImplementedError + :return: Returns the training data for the AutoML job. + :rtype: Input + """ + raise NotImplementedError() + + @training_data.setter + def training_data(self, value: Any) -> None: + self.training_data = value + + @property + @abstractmethod + def validation_data(self) -> Input: + """The validation data for the AutoML job. + + :raises NotImplementedError: Raises NotImplementedError + :return: Returns the validation data for the AutoML job. + :rtype: Input + """ + raise NotImplementedError() + + @validation_data.setter + def validation_data(self, value: Any) -> None: + self.validation_data = value + + @property + @abstractmethod + def test_data(self) -> Input: + """The test data for the AutoML job. + + :raises NotImplementedError: Raises NotImplementedError + :return: Returns the test data for the AutoML job. + :rtype: Input + """ + raise NotImplementedError() + + @test_data.setter + def test_data(self, value: Any) -> None: + self.test_data = value + + @classmethod + def _load_from_rest(cls, obj: JobBase) -> "AutoMLJob": + """Loads the rest object to a dict containing items to init the AutoMLJob objects. + + :param obj: Azure Resource Manager resource envelope. + :type obj: JobBase + :raises ValidationException: task type validation error + :return: An AutoML Job + :rtype: AutoMLJob + """ + task_type = ( + camel_to_snake(obj.properties.task_details.task_type) if obj.properties.task_details.task_type else None + ) + class_type = cls._get_task_mapping().get(task_type, None) + if class_type: + res: AutoMLJob = class_type._from_rest_object(obj) + return res + msg = f"Unsupported task type: {obj.properties.task_details.task_type}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.SYSTEM_ERROR, + ) + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "AutoMLJob": + """Loads the dictionary objects to an AutoMLJob object. + + :param data: A data dictionary. + :type data: typing.Dict + :param context: A context dictionary. + :type context: typing.Dict + :param additional_message: An additional message to be logged in the ValidationException. + :type additional_message: str + + :raises ValidationException: task type validation error + :return: An AutoML Job + :rtype: AutoMLJob + """ + task_type = data.get(AutoMLConstants.TASK_TYPE_YAML) + class_type = cls._get_task_mapping().get(task_type, None) + if class_type: + res: AutoMLJob = class_type._load_from_dict( + data, + context, + additional_message, + **kwargs, + ) + return res + msg = f"Unsupported task type: {task_type}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "AutoMLJob": + """Create an automl job instance from schema parsed dict. + + :param loaded_data: A loaded_data dictionary. + :type loaded_data: typing.Dict + :raises ValidationException: task type validation error + :return: An AutoML Job + :rtype: AutoMLJob + """ + task_type = loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML) + class_type = cls._get_task_mapping().get(task_type, None) + if class_type: + res: AutoMLJob = class_type._create_instance_from_schema_dict(loaded_data=loaded_data) + return res + msg = f"Unsupported task type: {task_type}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + @classmethod + def _get_task_mapping(cls) -> Dict: + """Create a mapping of task type to job class. + + :return: An AutoMLVertical object containing the task type to job class mapping. + :rtype: AutoMLVertical + """ + from .image import ( + ImageClassificationJob, + ImageClassificationMultilabelJob, + ImageInstanceSegmentationJob, + ImageObjectDetectionJob, + ) + from .nlp import TextClassificationJob, TextClassificationMultilabelJob, TextNerJob + from .tabular import ClassificationJob, ForecastingJob, RegressionJob + + # create a mapping of task type to job class + return { + camel_to_snake(TaskType.CLASSIFICATION): ClassificationJob, + camel_to_snake(TaskType.REGRESSION): RegressionJob, + camel_to_snake(TaskType.FORECASTING): ForecastingJob, + camel_to_snake(TaskType.IMAGE_CLASSIFICATION): ImageClassificationJob, + camel_to_snake(TaskType.IMAGE_CLASSIFICATION_MULTILABEL): ImageClassificationMultilabelJob, + camel_to_snake(TaskType.IMAGE_OBJECT_DETECTION): ImageObjectDetectionJob, + camel_to_snake(TaskType.IMAGE_INSTANCE_SEGMENTATION): ImageInstanceSegmentationJob, + camel_to_snake(TaskType.TEXT_NER): TextNerJob, + camel_to_snake(TaskType.TEXT_CLASSIFICATION): TextClassificationJob, + camel_to_snake(TaskType.TEXT_CLASSIFICATION_MULTILABEL): TextClassificationMultilabelJob, + } + + def _resolve_data_inputs(self, rest_job: "AutoMLJob") -> None: + """Resolve JobInputs to MLTableJobInputs within data_settings. + + :param rest_job: The rest job object. + :type rest_job: AutoMLJob + """ + if isinstance(rest_job.training_data, Input): + rest_job.training_data = MLTableJobInput(uri=rest_job.training_data.path) + if isinstance(rest_job.validation_data, Input): + rest_job.validation_data = MLTableJobInput(uri=rest_job.validation_data.path) + if hasattr(rest_job, "test_data") and isinstance(rest_job.test_data, Input): + rest_job.test_data = MLTableJobInput(uri=rest_job.test_data.path) + + def _restore_data_inputs(self) -> None: + """Restore MLTableJobInputs to JobInputs within data_settings.""" + if isinstance(self.training_data, MLTableJobInput): + self.training_data = Input(type=AssetTypes.MLTABLE, path=self.training_data.uri) + if isinstance(self.validation_data, MLTableJobInput): + self.validation_data = Input(type=AssetTypes.MLTABLE, path=self.validation_data.uri) + if hasattr(self, "test_data") and isinstance(self.test_data, MLTableJobInput): + self.test_data = Input(type=AssetTypes.MLTABLE, path=self.test_data.uri) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_vertical.py new file mode 100644 index 00000000..f11be81c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/automl_vertical.py @@ -0,0 +1,134 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from abc import abstractmethod +from typing import Any, Optional + +from azure.ai.ml import Input + +from .automl_job import AutoMLJob + + +class AutoMLVertical(AutoMLJob): + """Abstract class for AutoML verticals. + + :param task_type: The type of task to run. Possible values include: "classification", "regression", "forecasting". + :type task_type: str + :param training_data: Training data input + :type training_data: Input + :param validation_data: Validation data input + :type validation_data: Input + :param test_data: Test data input, defaults to None + :type test_data: typing.Optional[Input] + :raises ValueError: If task_type is not one of "classification", "regression", "forecasting". + :raises ValueError: If training_data is not of type Input. + :raises ValueError: If validation_data is not of type Input. + :raises ValueError: If test_data is not of type Input. + """ + + @abstractmethod + def __init__( + self, + task_type: str, + training_data: Input, + validation_data: Input, + test_data: Optional[Input] = None, + **kwargs: Any + ) -> None: + """Initialize AutoMLVertical. + + Constructor for AutoMLVertical. + + :param task_type: The type of task to run. Possible values include: "classification", "regression" + , "forecasting". + :type task_type: str + :param training_data: Training data input + :type training_data: Input + :param validation_data: Validation data input + :type validation_data: Input + :param test_data: Test data input, defaults to None + :type test_data: typing.Optional[Input] + :raises ValueError: If task_type is not one of "classification", "regression", "forecasting". + :raises ValueError: If training_data is not of type Input. + :raises ValueError: If validation_data is not of type Input. + :raises ValueError: If test_data is not of type Input. + """ + self._task_type = task_type + self.training_data = training_data + self.validation_data = validation_data + self.test_data = test_data # type: ignore + super().__init__(**kwargs) + + @property + def task_type(self) -> str: + """Get task type. + + :return: The type of task to run. Possible values include: "classification", "regression", "forecasting". + :rtype: str + """ + return self._task_type + + @task_type.setter + def task_type(self, task_type: str) -> None: + """Set task type. + + :param task_type: The type of task to run. Possible values include: "classification", "regression" + , "forecasting". + :type task_type: str + """ + self._task_type = task_type + + @property + def training_data(self) -> Input: + """Get training data. + + :return: Training data input + :rtype: Input + """ + return self._training_data + + @training_data.setter + def training_data(self, training_data: Input) -> None: + """Set training data. + + :param training_data: Training data input + :type training_data: Input + """ + self._training_data = training_data + + @property + def validation_data(self) -> Input: + """Get validation data. + + :return: Validation data input + :rtype: Input + """ + return self._validation_data + + @validation_data.setter + def validation_data(self, validation_data: Input) -> None: + """Set validation data. + + :param validation_data: Validation data input + :type validation_data: Input + """ + self._validation_data = validation_data + + @property + def test_data(self) -> Input: + """Get test data. + + :return: Test data input + :rtype: Input + """ + return self._test_data + + @test_data.setter + def test_data(self, test_data: Input) -> None: + """Set test data. + + :param test_data: Test data input + :type test_data: Input + """ + self._test_data = test_data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/featurization_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/featurization_settings.py new file mode 100644 index 00000000..c9e73d21 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/featurization_settings.py @@ -0,0 +1,32 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class FeaturizationSettings(RestTranslatableMixin): + """Base Featurization settings.""" + + def __init__( + self, + *, + dataset_language: Optional[str] = None, + ): + self.dataset_language = dataset_language + + def __eq__(self, other: object) -> bool: + if not isinstance(other, FeaturizationSettings): + return NotImplemented + + return self.dataset_language == other.dataset_language + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +class FeaturizationSettingsType: + NLP = "nlp" + TABULAR = "tabular" diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/__init__.py new file mode 100644 index 00000000..46964086 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/__init__.py @@ -0,0 +1,35 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from .automl_image import AutoMLImage +from .image_classification_job import ImageClassificationJob +from .image_classification_multilabel_job import ImageClassificationMultilabelJob +from .image_classification_search_space import ImageClassificationSearchSpace +from .image_instance_segmentation_job import ImageInstanceSegmentationJob +from .image_limit_settings import ImageLimitSettings +from .image_model_settings import ( + ImageModelSettingsClassification, + ImageModelSettingsObjectDetection, + LogTrainingMetrics, + LogValidationLoss, +) +from .image_object_detection_job import ImageObjectDetectionJob +from .image_object_detection_search_space import ImageObjectDetectionSearchSpace +from .image_sweep_settings import ImageSweepSettings + +__all__ = [ + "AutoMLImage", + "LogTrainingMetrics", + "LogValidationLoss", + "ImageClassificationJob", + "ImageClassificationMultilabelJob", + "ImageClassificationSearchSpace", + "ImageInstanceSegmentationJob", + "ImageLimitSettings", + "ImageObjectDetectionJob", + "ImageObjectDetectionSearchSpace", + "ImageSweepSettings", + "ImageModelSettingsClassification", + "ImageModelSettingsObjectDetection", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image.py new file mode 100644 index 00000000..a07bba4a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image.py @@ -0,0 +1,244 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from abc import ABC +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import LogVerbosity, SamplingAlgorithmType +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.automl_vertical import AutoMLVertical +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._job.sweep.early_termination_policy import ( + BanditPolicy, + MedianStoppingPolicy, + TruncationSelectionPolicy, +) +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class AutoMLImage(AutoMLVertical, ABC): + """Base class for all AutoML Image jobs. + You should not instantiate this class directly. + Instead you should create classes for specific AutoML Image tasks. + + :keyword task_type: Required. Type of task to run. + Possible values include: "ImageClassification", "ImageClassificationMultilabel", + "ImageObjectDetection", "ImageInstanceSegmentation" + :paramtype task_type: str + :keyword limits: Limit settings for all AutoML Image jobs. Defaults to None. + :paramtype limits: Optional[~azure.ai.ml.automl.ImageLimitSettings] + :keyword sweep: Sweep settings for all AutoML Image jobs. Defaults to None. + :paramtype sweep: Optional[~azure.ai.ml.automl.ImageSweepSettings] + :keyword kwargs: Additional keyword arguments for AutoMLImage. + :paramtype kwargs: Dict[str, Any] + """ + + def __init__( + self, + *, + task_type: str, + limits: Optional[ImageLimitSettings] = None, + sweep: Optional[ImageSweepSettings] = None, + **kwargs: Any, + ) -> None: + self.log_verbosity = kwargs.pop("log_verbosity", LogVerbosity.INFO) + self.target_column_name = kwargs.pop("target_column_name", None) + self.validation_data_size = kwargs.pop("validation_data_size", None) + + super().__init__( + task_type=task_type, + training_data=kwargs.pop("training_data", None), + validation_data=kwargs.pop("validation_data", None), + **kwargs, + ) + + # Set default value for self._limits as it is a required property in rest object. + self._limits = limits or ImageLimitSettings() + self._sweep = sweep + + @property + def log_verbosity(self) -> LogVerbosity: + """Returns the verbosity of the logger. + + :return: The log verbosity. + :rtype: ~azure.ai.ml._restclient.v2023_04_01_preview.models.LogVerbosity + """ + return self._log_verbosity + + @log_verbosity.setter + def log_verbosity(self, value: Union[str, LogVerbosity]) -> None: + """Sets the verbosity of the logger. + + :param value: The value to set the log verbosity to. + Possible values include: "NotSet", "Debug", "Info", "Warning", "Error", "Critical". + :type value: Union[str, ~azure.ai.ml._restclient.v2023_04_01_preview.models.LogVerbosity] + """ + self._log_verbosity = None if value is None else LogVerbosity[camel_to_snake(value).upper()] + + @property + def limits(self) -> ImageLimitSettings: + """Returns the limit settings for all AutoML Image jobs. + + :return: The limit settings. + :rtype: ~azure.ai.ml.automl.ImageLimitSettings + """ + return self._limits + + @limits.setter + def limits(self, value: Union[Dict, ImageLimitSettings]) -> None: + if isinstance(value, ImageLimitSettings): + self._limits = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for limit settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_limits(**value) + + @property + def sweep(self) -> Optional[ImageSweepSettings]: + """Returns the sweep settings for all AutoML Image jobs. + + :return: The sweep settings. + :rtype: ~azure.ai.ml.automl.ImageSweepSettings + """ + return self._sweep + + @sweep.setter + def sweep(self, value: Union[Dict, ImageSweepSettings]) -> None: + """Sets the sweep settings for all AutoML Image jobs. + + :param value: The value to set the sweep settings to. + :type value: Union[Dict, ~azure.ai.ml.automl.ImageSweepSettings] + :raises ~azure.ai.ml.exceptions.ValidationException: If value is not a dictionary. + :return: None + """ + if isinstance(value, ImageSweepSettings): + self._sweep = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for sweep settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_sweep(**value) + + def set_data( + self, + *, + training_data: Input, + target_column_name: str, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + ) -> None: + """Data settings for all AutoML Image jobs. + + :keyword training_data: Required. Training data. + :type training_data: ~azure.ai.ml.entities.Input + :keyword target_column_name: Required. Target column name. + :type target_column_name: str + :keyword validation_data: Optional. Validation data. + :type validation_data: Optional[~azure.ai.ml.entities.Input] + :keyword validation_data_size: Optional. The fraction of training dataset that needs to be set aside for + validation purpose. Values should be in range (0.0 , 1.0). + Applied only when validation dataset is not provided. + :type validation_data_size: Optional[float] + :return: None + """ + self.target_column_name = self.target_column_name if target_column_name is None else target_column_name + self.training_data = self.training_data if training_data is None else training_data + self.validation_data = self.validation_data if validation_data is None else validation_data + self.validation_data_size = self.validation_data_size if validation_data_size is None else validation_data_size + + def set_limits( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_trials: Optional[int] = None, + timeout_minutes: Optional[int] = None, + ) -> None: + """Limit settings for all AutoML Image Jobs. + + :keyword max_concurrent_trials: Maximum number of trials to run concurrently. + :type max_concurrent_trials: Optional[int]. Defaults to None. + :keyword max_trials: Maximum number of trials to run. Defaults to None. + :type max_trials: Optional[int] + :keyword timeout_minutes: AutoML job timeout. + :type timeout_minutes: ~datetime.timedelta + :return: None + """ + self._limits = self._limits or ImageLimitSettings() + self._limits.max_concurrent_trials = ( + max_concurrent_trials if max_concurrent_trials is not None else self._limits.max_concurrent_trials + ) + self._limits.max_trials = max_trials if max_trials is not None else self._limits.max_trials + self._limits.timeout_minutes = timeout_minutes if timeout_minutes is not None else self._limits.timeout_minutes + + def set_sweep( + self, + *, + sampling_algorithm: Union[ + str, SamplingAlgorithmType.RANDOM, SamplingAlgorithmType.GRID, SamplingAlgorithmType.BAYESIAN + ], + early_termination: Optional[Union[BanditPolicy, MedianStoppingPolicy, TruncationSelectionPolicy]] = None, + ) -> None: + """Sweep settings for all AutoML Image jobs. + + :keyword sampling_algorithm: Required. Type of the hyperparameter sampling + algorithms. Possible values include: "Grid", "Random", "Bayesian". + :type sampling_algorithm: Union[str, ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.RANDOM, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.GRID, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.BAYESIAN] + :keyword early_termination: Type of early termination policy. + :type early_termination: Union[ + ~azure.mgmt.machinelearningservices.models.BanditPolicy, + ~azure.mgmt.machinelearningservices.models.MedianStoppingPolicy, + ~azure.mgmt.machinelearningservices.models.TruncationSelectionPolicy] + :return: None + """ + if self._sweep: + self._sweep.sampling_algorithm = sampling_algorithm + else: + self._sweep = ImageSweepSettings(sampling_algorithm=sampling_algorithm) + + self._sweep.early_termination = early_termination or self._sweep.early_termination + + def __eq__(self, other: object) -> bool: + """Compares two AutoMLImage objects for equality. + + :param other: The other AutoMLImage object to compare to. + :type other: ~azure.ai.ml.automl.AutoMLImage + :return: True if the two AutoMLImage objects are equal; False otherwise. + :rtype: bool + """ + if not isinstance(other, AutoMLImage): + return NotImplemented + + return ( + self.target_column_name == other.target_column_name + and self.training_data == other.training_data + and self.validation_data == other.validation_data + and self.validation_data_size == other.validation_data_size + and self._limits == other._limits + and self._sweep == other._sweep + ) + + def __ne__(self, other: object) -> bool: + """Compares two AutoMLImage objects for inequality. + + :param other: The other AutoMLImage object to compare to. + :type other: ~azure.ai.ml.automl.AutoMLImage + :return: True if the two AutoMLImage objects are not equal; False otherwise. + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_classification_base.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_classification_base.py new file mode 100644 index 00000000..ef0c8a2d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_classification_base.py @@ -0,0 +1,439 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import LearningRateScheduler, StochasticOptimizer +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._job.automl.image.automl_image import AutoMLImage +from azure.ai.ml.entities._job.automl.image.image_classification_search_space import ImageClassificationSearchSpace +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsClassification +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.utils import cast_to_specific_search_space +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class AutoMLImageClassificationBase(AutoMLImage): + """Base class for AutoML Image Classification and Image Classification Multilabel tasks. + Please do not instantiate this class directly. Instantiate one of the child classes instead. + + :keyword task_type: Type of task to run. + Possible values include: "ImageClassification", "ImageClassificationMultilabel". + :paramtype task_type: str + :keyword limits: Limits for Automl image classification jobs. Defaults to None. + :paramtype limits: Optional[~azure.ai.ml.automl.ImageLimitSettings] + :keyword sweep: Sweep settings for Automl image classification jobs. Defaults to None. + :paramtype sweep: Optional[~azure.ai.ml.automl.ImageSweepSettings] + :keyword training_parameters: Training parameters for Automl image classification jobs. Defaults to None. + :paramtype training_parameters: Optional[~azure.ai.ml.automl.ImageModelSettingsClassification] + :keyword search_space: Search space for Automl image classification jobs. Defaults to None. + :paramtype search_space: Optional[List[~azure.ai.ml.automl.ImageClassificationSearchSpace]] + :keyword kwargs: Other Keyword arguments for AutoMLImageClassificationBase class. + :paramtype kwargs: Dict[str, Any] + """ + + def __init__( + self, + *, + task_type: str, + limits: Optional[ImageLimitSettings] = None, + sweep: Optional[ImageSweepSettings] = None, + training_parameters: Optional[ImageModelSettingsClassification] = None, + search_space: Optional[List[ImageClassificationSearchSpace]] = None, + **kwargs: Any, + ) -> None: + self._training_parameters: Optional[ImageModelSettingsClassification] = None + + super().__init__(task_type=task_type, limits=limits, sweep=sweep, **kwargs) + self.training_parameters = training_parameters # Assigning training_parameters through setter method. + self._search_space = search_space + + @property + def training_parameters(self) -> Optional[ImageModelSettingsClassification]: + """ + :rtype: ~azure.ai.ml.automl.ImageModelSettingsClassification + :return: Training parameters for AutoML Image Classification and Image Classification Multilabel tasks. + """ + return self._training_parameters + + @training_parameters.setter + def training_parameters(self, value: Union[Dict, ImageModelSettingsClassification]) -> None: + """Setting Image training parameters for AutoML Image Classification and Image Classification Multilabel tasks. + + :param value: Training parameters for AutoML Image Classification and Image Classification Multilabel tasks. + :type value: Union[Dict, ~azure.ai.ml.automl.ImageModelSettingsClassification] + :raises ~azure.ml.exceptions.ValidationException if value is not a dictionary or + ImageModelSettingsClassification. + :return: None + """ + if value is None: + self._training_parameters = None + elif isinstance(value, ImageModelSettingsClassification): + self._training_parameters = value + # set_training_parameters convert parameter values from snake case str to enum. + # We need to add any future enum parameters in this call to support snake case str. + self.set_training_parameters( + optimizer=value.optimizer, + learning_rate_scheduler=value.learning_rate_scheduler, + ) + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for model settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training_parameters(**value) + + @property + def search_space(self) -> Optional[List[ImageClassificationSearchSpace]]: + """ + :rtype: List[~azure.ai.ml.automl.ImageClassificationSearchSpace] + :return: Search space for AutoML Image Classification and Image Classification Multilabel tasks. + """ + return self._search_space + + @search_space.setter + def search_space(self, value: Union[List[Dict], List[SearchSpace]]) -> None: + """Setting Image search space for AutoML Image Classification and Image Classification Multilabel tasks. + + :param value: Search space for AutoML Image Classification and Image Classification Multilabel tasks. + :type value: Union[List[Dict], List[~azure.ai.ml.automl.ImageClassificationSearchSpace]] + :raises ~azure.ml.exceptions.ValidationException if value is not a list of dictionaries or + ImageClassificationSearchSpace. + """ + if not isinstance(value, list): + msg = "Expected a list for search space." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + all_dict_type = all(isinstance(item, dict) for item in value) + all_search_space_type = all(isinstance(item, SearchSpace) for item in value) + + if all_search_space_type or all_dict_type: + self._search_space = [ + cast_to_specific_search_space(item, ImageClassificationSearchSpace, self.task_type) # type: ignore + for item in value + ] + else: + msg = "Expected all items in the list to be either dictionaries or ImageClassificationSearchSpace objects." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + # pylint: disable=too-many-locals + def set_training_parameters( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[Union[str, LearningRateScheduler]] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[Union[str, StochasticOptimizer]] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + training_crop_size: Optional[int] = None, + validation_crop_size: Optional[int] = None, + validation_resize_size: Optional[int] = None, + weighted_loss: Optional[int] = None, + ) -> None: + """Setting Image training parameters for AutoML Image Classification and Image Classification Multilabel tasks. + + :keyword advanced_settings: Settings for advanced scenarios. + :paramtype advanced_settings: str + :keyword ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :paramtype ams_gradient: bool + :keyword beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :paramtype beta1: float + :keyword beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :paramtype beta2: float + :keyword checkpoint_frequency: Frequency to store model checkpoints. Must be a positive + integer. + :paramtype checkpoint_frequency: int + :keyword checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :paramtype checkpoint_run_id: str + :keyword distributed: Whether to use distributed training. + :paramtype distributed: bool + :keyword early_stopping: Enable early stopping logic during training. + :paramtype early_stopping: bool + :keyword early_stopping_delay: Minimum number of epochs or validation evaluations to wait + before primary metric improvement + is tracked for early stopping. Must be a positive integer. + :paramtype early_stopping_delay: int + :keyword early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :paramtype early_stopping_patience: int + :keyword enable_onnx_normalization: Enable normalization when exporting ONNX model. + :paramtype enable_onnx_normalization: bool + :keyword evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. + Must be a positive integer. + :paramtype evaluation_frequency: int + :keyword gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :paramtype gradient_accumulation_step: int + :keyword layers_to_freeze: Number of layers to freeze for the model. Must be a positive + integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters#model-agnostic-hyperparameters. # pylint: disable=line-too-long + :type layers_to_freeze: int + :keyword learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :paramtype learning_rate: float + :keyword learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :keyword model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :keyword momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, + 1]. + :paramtype momentum: float + :keyword nesterov: Enable nesterov when optimizer is 'sgd'. + :paramtype nesterov: bool + :keyword number_of_epochs: Number of training epochs. Must be a positive integer. + :paramtype number_of_epochs: int + :keyword number_of_workers: Number of data loader workers. Must be a non-negative integer. + :paramtype number_of_workers: int + :keyword optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :keyword random_seed: Random seed to be used when using deterministic training. + :paramtype random_seed: int + :keyword step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float + in the range [0, 1]. + :paramtype step_lr_gamma: float + :keyword step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be + a positive integer. + :paramtype step_lr_step_size: int + :keyword training_batch_size: Training batch size. Must be a positive integer. + :paramtype training_batch_size: int + :keyword validation_batch_size: Validation batch size. Must be a positive integer. + :paramtype validation_batch_size: int + :keyword warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :paramtype warmup_cosine_lr_cycles: float + :keyword warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :paramtype warmup_cosine_lr_warmup_epochs: int + :keyword weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must + be a float in the range[0, 1]. + :paramtype weight_decay: float + :keyword training_crop_size: Image crop size that is input to the neural network for the + training dataset. Must be a positive integer. + :paramtype training_crop_size: int + :keyword validation_crop_size: Image crop size that is input to the neural network for the + validation dataset. Must be a positive integer. + :paramtype validation_crop_size: int + :keyword validation_resize_size: Image size to which to resize before cropping for validation + dataset. Must be a positive integer. + :paramtype validation_resize_size: int + :keyword weighted_loss: Weighted loss. The accepted values are 0 for no weighted loss. + 1 for weighted loss with sqrt.(class_weights). 2 for weighted loss with class_weights. Must be + 0 or 1 or 2. + :paramtype weighted_loss: int + """ + self._training_parameters = self._training_parameters or ImageModelSettingsClassification() + + self._training_parameters.advanced_settings = ( + advanced_settings if advanced_settings is not None else self._training_parameters.advanced_settings + ) + self._training_parameters.ams_gradient = ( + ams_gradient if ams_gradient is not None else self._training_parameters.ams_gradient + ) + self._training_parameters.beta1 = beta1 if beta1 is not None else self._training_parameters.beta1 + self._training_parameters.beta2 = beta2 if beta2 is not None else self._training_parameters.beta2 + self._training_parameters.checkpoint_frequency = ( + checkpoint_frequency if checkpoint_frequency is not None else self._training_parameters.checkpoint_frequency + ) + self._training_parameters.checkpoint_run_id = ( + checkpoint_run_id if checkpoint_run_id is not None else self._training_parameters.checkpoint_run_id + ) + self._training_parameters.distributed = ( + distributed if distributed is not None else self._training_parameters.distributed + ) + self._training_parameters.early_stopping = ( + early_stopping if early_stopping is not None else self._training_parameters.early_stopping + ) + self._training_parameters.early_stopping_delay = ( + early_stopping_delay if early_stopping_delay is not None else self._training_parameters.early_stopping_delay + ) + self._training_parameters.early_stopping_patience = ( + early_stopping_patience + if early_stopping_patience is not None + else self._training_parameters.early_stopping_patience + ) + self._training_parameters.enable_onnx_normalization = ( + enable_onnx_normalization + if enable_onnx_normalization is not None + else self._training_parameters.enable_onnx_normalization + ) + self._training_parameters.evaluation_frequency = ( + evaluation_frequency if evaluation_frequency is not None else self._training_parameters.evaluation_frequency + ) + self._training_parameters.gradient_accumulation_step = ( + gradient_accumulation_step + if gradient_accumulation_step is not None + else self._training_parameters.gradient_accumulation_step + ) + self._training_parameters.layers_to_freeze = ( + layers_to_freeze if layers_to_freeze is not None else self._training_parameters.layers_to_freeze + ) + self._training_parameters.learning_rate = ( + learning_rate if learning_rate is not None else self._training_parameters.learning_rate + ) + self._training_parameters.learning_rate_scheduler = ( + LearningRateScheduler[camel_to_snake(learning_rate_scheduler).upper()] + if learning_rate_scheduler is not None + else self._training_parameters.learning_rate_scheduler + ) + self._training_parameters.model_name = ( + model_name if model_name is not None else self._training_parameters.model_name + ) + self._training_parameters.momentum = momentum if momentum is not None else self._training_parameters.momentum + self._training_parameters.nesterov = nesterov if nesterov is not None else self._training_parameters.nesterov + self._training_parameters.number_of_epochs = ( + number_of_epochs if number_of_epochs is not None else self._training_parameters.number_of_epochs + ) + self._training_parameters.number_of_workers = ( + number_of_workers if number_of_workers is not None else self._training_parameters.number_of_workers + ) + self._training_parameters.optimizer = ( + StochasticOptimizer[camel_to_snake(optimizer).upper()] + if optimizer is not None + else self._training_parameters.optimizer + ) + self._training_parameters.random_seed = ( + random_seed if random_seed is not None else self._training_parameters.random_seed + ) + self._training_parameters.step_lr_gamma = ( + step_lr_gamma if step_lr_gamma is not None else self._training_parameters.step_lr_gamma + ) + self._training_parameters.step_lr_step_size = ( + step_lr_step_size if step_lr_step_size is not None else self._training_parameters.step_lr_step_size + ) + self._training_parameters.training_batch_size = ( + training_batch_size if training_batch_size is not None else self._training_parameters.training_batch_size + ) + self._training_parameters.validation_batch_size = ( + validation_batch_size + if validation_batch_size is not None + else self._training_parameters.validation_batch_size + ) + self._training_parameters.warmup_cosine_lr_cycles = ( + warmup_cosine_lr_cycles + if warmup_cosine_lr_cycles is not None + else self._training_parameters.warmup_cosine_lr_cycles + ) + self._training_parameters.warmup_cosine_lr_warmup_epochs = ( + warmup_cosine_lr_warmup_epochs + if warmup_cosine_lr_warmup_epochs is not None + else self._training_parameters.warmup_cosine_lr_warmup_epochs + ) + self._training_parameters.weight_decay = ( + weight_decay if weight_decay is not None else self._training_parameters.weight_decay + ) + self._training_parameters.training_crop_size = ( + training_crop_size if training_crop_size is not None else self._training_parameters.training_crop_size + ) + self._training_parameters.validation_crop_size = ( + validation_crop_size if validation_crop_size is not None else self._training_parameters.validation_crop_size + ) + self._training_parameters.validation_resize_size = ( + validation_resize_size + if validation_resize_size is not None + else self._training_parameters.validation_resize_size + ) + self._training_parameters.weighted_loss = ( + weighted_loss if weighted_loss is not None else self._training_parameters.weighted_loss + ) + + # pylint: enable=too-many-locals + + def extend_search_space( + self, + value: Union[SearchSpace, List[SearchSpace]], + ) -> None: + """Add Search space for AutoML Image Classification and Image Classification Multilabel tasks. + + :param value: specify either an instance of ImageClassificationSearchSpace or list of + ImageClassificationSearchSpace for searching through the parameter space + :type value: Union[ImageClassificationSearchSpace, List[ImageClassificationSearchSpace]] + """ + self._search_space = self._search_space or [] + + if isinstance(value, list): + self._search_space.extend( + [ + cast_to_specific_search_space(item, ImageClassificationSearchSpace, self.task_type) # type: ignore + for item in value + ] + ) + else: + self._search_space.append( + cast_to_specific_search_space(value, ImageClassificationSearchSpace, self.task_type) # type: ignore + ) + + @classmethod + def _get_search_space_from_str(cls, search_space_str: str) -> Optional[List[ImageClassificationSearchSpace]]: + return ( + [ImageClassificationSearchSpace._from_rest_object(entry) for entry in search_space_str if entry is not None] + if search_space_str is not None + else None + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, AutoMLImageClassificationBase): + return NotImplemented + + if not super().__eq__(other): + return False + + return self._training_parameters == other._training_parameters and self._search_space == other._search_space + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_object_detection_base.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_object_detection_base.py new file mode 100644 index 00000000..db0c7bc6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/automl_image_object_detection_base.py @@ -0,0 +1,524 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LearningRateScheduler, + LogTrainingMetrics, + LogValidationLoss, + ModelSize, + StochasticOptimizer, + ValidationMetricType, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._job.automl import SearchSpace +from azure.ai.ml.entities._job.automl.image.automl_image import AutoMLImage +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsObjectDetection +from azure.ai.ml.entities._job.automl.image.image_object_detection_search_space import ImageObjectDetectionSearchSpace +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._job.automl.utils import cast_to_specific_search_space +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class AutoMLImageObjectDetectionBase(AutoMLImage): + """Base class for AutoML Image Object Detection and Image Instance Segmentation tasks. + + :keyword task_type: Type of task to run. Possible values include: "ImageObjectDetection", + "ImageInstanceSegmentation". + :paramtype task_type: str + :keyword limits: The resource limits for the job. + :paramtype limits: Optional[~azure.ai.ml.entities._job.automl.image.image_limit_settings.ImageLimitSettings] + :keyword sweep: The sweep settings for the job. + :paramtype sweep: Optional[~azure.ai.ml.entities._job.automl.image.image_sweep_settings.ImageSweepSettings] + :keyword training_parameters: The training parameters for the job. + :paramtype training_parameters: Optional[~azure.ai.ml.automl.ImageModelSettingsObjectDetection] + :keyword search_space: The search space for the job. + :paramtype search_space: Optional[List[~azure.ai.ml.automl.ImageObjectDetectionSearchSpace]] + """ + + def __init__( + self, + *, + task_type: str, + limits: Optional[ImageLimitSettings] = None, + sweep: Optional[ImageSweepSettings] = None, + training_parameters: Optional[ImageModelSettingsObjectDetection] = None, + search_space: Optional[List[ImageObjectDetectionSearchSpace]] = None, + **kwargs: Any, + ) -> None: + self._training_parameters: Optional[ImageModelSettingsObjectDetection] = None + + super().__init__(task_type=task_type, limits=limits, sweep=sweep, **kwargs) + + self.training_parameters = training_parameters # Assigning training_parameters through setter method. + + self._search_space = search_space + + @property + def training_parameters(self) -> Optional[ImageModelSettingsObjectDetection]: + return self._training_parameters + + @training_parameters.setter + def training_parameters(self, value: Union[Dict, ImageModelSettingsObjectDetection]) -> None: + if value is None: + self._training_parameters = None + elif isinstance(value, ImageModelSettingsObjectDetection): + self._training_parameters = value + # set_training_parameters convert parameter values from snake case str to enum. + # We need to add any future enum parameters in this call to support snake case str. + self.set_training_parameters( + optimizer=value.optimizer, + learning_rate_scheduler=value.learning_rate_scheduler, + model_size=value.model_size, + validation_metric_type=value.validation_metric_type, + log_training_metrics=value.log_training_metrics, + log_validation_loss=value.log_validation_loss, + ) + elif value is None: + self._training_parameters = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for model settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training_parameters(**value) + + @property + def search_space(self) -> Optional[List[ImageObjectDetectionSearchSpace]]: + return self._search_space + + @search_space.setter + def search_space(self, value: Union[List[Dict], List[SearchSpace]]) -> None: + if not isinstance(value, list): + msg = "Expected a list for search space." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + all_dict_type = all(isinstance(item, dict) for item in value) + all_search_space_type = all(isinstance(item, SearchSpace) for item in value) + + if all_search_space_type or all_dict_type: + self._search_space = [ + cast_to_specific_search_space(item, ImageObjectDetectionSearchSpace, self.task_type) # type: ignore + for item in value + ] + else: + msg = "Expected all items in the list to be either dictionaries or SearchSpace objects." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + # pylint: disable=too-many-locals + def set_training_parameters( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[Union[str, LearningRateScheduler]] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[Union[str, StochasticOptimizer]] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + box_detections_per_image: Optional[int] = None, + box_score_threshold: Optional[float] = None, + image_size: Optional[int] = None, + max_size: Optional[int] = None, + min_size: Optional[int] = None, + model_size: Optional[Union[str, ModelSize]] = None, + multi_scale: Optional[bool] = None, + nms_iou_threshold: Optional[float] = None, + tile_grid_size: Optional[str] = None, + tile_overlap_ratio: Optional[float] = None, + tile_predictions_nms_threshold: Optional[float] = None, + validation_iou_threshold: Optional[float] = None, + validation_metric_type: Optional[Union[str, ValidationMetricType]] = None, + log_training_metrics: Optional[Union[str, LogTrainingMetrics]] = None, + log_validation_loss: Optional[Union[str, LogValidationLoss]] = None, + ) -> None: + """Setting Image training parameters for for AutoML Image Object Detection and Image Instance Segmentation + tasks. + + :keyword advanced_settings: Settings for advanced scenarios. + :paramtype advanced_settings: str + :keyword ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :paramtype ams_gradient: bool + :keyword beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :paramtype beta1: float + :keyword beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :paramtype beta2: float + :keyword checkpoint_frequency: Frequency to store model checkpoints. Must be a positive + integer. + :paramtype checkpoint_frequency: int + :keyword checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :paramtype checkpoint_run_id: str + :keyword distributed: Whether to use distributed training. + :paramtype distributed: bool + :keyword early_stopping: Enable early stopping logic during training. + :paramtype early_stopping: bool + :keyword early_stopping_delay: Minimum number of epochs or validation evaluations to wait + before primary metric improvement + is tracked for early stopping. Must be a positive integer. + :paramtype early_stopping_delay: int + :keyword early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :paramtype early_stopping_patience: int + :keyword enable_onnx_normalization: Enable normalization when exporting ONNX model. + :paramtype enable_onnx_normalization: bool + :keyword evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. + Must be a positive integer. + :paramtype evaluation_frequency: int + :keyword gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :paramtype gradient_accumulation_step: int + :keyword layers_to_freeze: Number of layers to freeze for the model. Must be a positive + integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters#model-agnostic-hyperparameters. # pylint: disable=line-too-long + :type layers_to_freeze: int + :keyword learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :paramtype learning_rate: float + :keyword learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :keyword model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :keyword momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, + 1]. + :paramtype momentum: float + :keyword nesterov: Enable nesterov when optimizer is 'sgd'. + :paramtype nesterov: bool + :keyword number_of_epochs: Number of training epochs. Must be a positive integer. + :paramtype number_of_epochs: int + :keyword number_of_workers: Number of data loader workers. Must be a non-negative integer. + :paramtype number_of_workers: int + :keyword optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :keyword random_seed: Random seed to be used when using deterministic training. + :paramtype random_seed: int + :keyword step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float + in the range [0, 1]. + :paramtype step_lr_gamma: float + :keyword step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be + a positive integer. + :paramtype step_lr_step_size: int + :keyword training_batch_size: Training batch size. Must be a positive integer. + :paramtype training_batch_size: int + :keyword validation_batch_size: Validation batch size. Must be a positive integer. + :paramtype validation_batch_size: int + :keyword warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :paramtype warmup_cosine_lr_cycles: float + :keyword warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :paramtype warmup_cosine_lr_warmup_epochs: int + :keyword weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must + be a float in the range[0, 1]. + :paramtype weight_decay: float + :keyword box_detections_per_image: Maximum number of detections per image, for all classes. + Must be a positive integer. + Note: This settings is not supported for the 'yolov5' algorithm. + :type box_detections_per_image: int + :keyword box_score_threshold: During inference, only return proposals with a classification + score greater than + BoxScoreThreshold. Must be a float in the range[0, 1]. + :paramtype box_score_threshold: float + :keyword image_size: Image size for training and validation. Must be a positive integer. + Note: The training run may get into CUDA OOM if the size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type image_size: int + :keyword max_size: Maximum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type max_size: int + :keyword min_size: Minimum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type min_size: int + :keyword model_size: Model size. Must be 'small', 'medium', 'large', or 'extra_large'. + Note: training run may get into CUDA OOM if the model size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type model_size: str or ~azure.mgmt.machinelearningservices.models.ModelSize + :keyword multi_scale: Enable multi-scale image by varying image size by +/- 50%. + Note: training run may get into CUDA OOM if no sufficient GPU memory. + Note: This settings is only supported for the 'yolov5' algorithm. + :type multi_scale: bool + :keyword nms_iou_threshold: IOU threshold used during inference in NMS post processing. Must be + float in the range [0, 1]. + :paramtype nms_iou_threshold: float + :keyword tile_grid_size: The grid size to use for tiling each image. Note: TileGridSize must + not be + None to enable small object detection logic. A string containing two integers in mxn format. + :type tile_grid_size: str + :keyword tile_overlap_ratio: Overlap ratio between adjacent tiles in each dimension. Must be + float in the range [0, 1). + :paramtype tile_overlap_ratio: float + :keyword tile_predictions_nms_threshold: The IOU threshold to use to perform NMS while merging + predictions from tiles and image. + Used in validation/ inference. Must be float in the range [0, 1]. + NMS: Non-maximum suppression. + :type tile_predictions_nms_threshold: str + :keyword validation_iou_threshold: IOU threshold to use when computing validation metric. Must + be float in the range [0, 1]. + :paramtype validation_iou_threshold: float + :keyword validation_metric_type: Metric computation method to use for validation metrics. Must + be 'none', 'coco', 'voc', or 'coco_voc'. + :paramtype validation_metric_type: str or ~azure.mgmt.machinelearningservices.models.ValidationMetricType + :keyword log_training_metrics: indicates whether or not to log training metrics. Must + be 'Enable' or 'Disable' + :paramtype log_training_metrics: str or ~azure.mgmt.machinelearningservices.models.LogTrainingMetrics + :keyword log_validation_loss: indicates whether or not to log validation loss. Must + be 'Enable' or 'Disable' + :paramtype log_validation_loss: str or ~azure.mgmt.machinelearningservices.models.LogValidationLoss + """ + self._training_parameters = self._training_parameters or ImageModelSettingsObjectDetection() + + self._training_parameters.advanced_settings = ( + advanced_settings if advanced_settings is not None else self._training_parameters.advanced_settings + ) + self._training_parameters.ams_gradient = ( + ams_gradient if ams_gradient is not None else self._training_parameters.ams_gradient + ) + self._training_parameters.beta1 = beta1 if beta1 is not None else self._training_parameters.beta1 + self._training_parameters.beta2 = beta2 if beta2 is not None else self._training_parameters.beta2 + self._training_parameters.checkpoint_frequency = ( + checkpoint_frequency if checkpoint_frequency is not None else self._training_parameters.checkpoint_frequency + ) + self._training_parameters.checkpoint_run_id = ( + checkpoint_run_id if checkpoint_run_id is not None else self._training_parameters.checkpoint_run_id + ) + self._training_parameters.distributed = ( + distributed if distributed is not None else self._training_parameters.distributed + ) + self._training_parameters.early_stopping = ( + early_stopping if early_stopping is not None else self._training_parameters.early_stopping + ) + self._training_parameters.early_stopping_delay = ( + early_stopping_delay if early_stopping_delay is not None else self._training_parameters.early_stopping_delay + ) + self._training_parameters.early_stopping_patience = ( + early_stopping_patience + if early_stopping_patience is not None + else self._training_parameters.early_stopping_patience + ) + self._training_parameters.enable_onnx_normalization = ( + enable_onnx_normalization + if enable_onnx_normalization is not None + else self._training_parameters.enable_onnx_normalization + ) + self._training_parameters.evaluation_frequency = ( + evaluation_frequency if evaluation_frequency is not None else self._training_parameters.evaluation_frequency + ) + self._training_parameters.gradient_accumulation_step = ( + gradient_accumulation_step + if gradient_accumulation_step is not None + else self._training_parameters.gradient_accumulation_step + ) + self._training_parameters.layers_to_freeze = ( + layers_to_freeze if layers_to_freeze is not None else self._training_parameters.layers_to_freeze + ) + self._training_parameters.learning_rate = ( + learning_rate if learning_rate is not None else self._training_parameters.learning_rate + ) + self._training_parameters.learning_rate_scheduler = ( + LearningRateScheduler[camel_to_snake(learning_rate_scheduler)] + if learning_rate_scheduler is not None + else self._training_parameters.learning_rate_scheduler + ) + self._training_parameters.model_name = ( + model_name if model_name is not None else self._training_parameters.model_name + ) + self._training_parameters.momentum = momentum if momentum is not None else self._training_parameters.momentum + self._training_parameters.nesterov = nesterov if nesterov is not None else self._training_parameters.nesterov + self._training_parameters.number_of_epochs = ( + number_of_epochs if number_of_epochs is not None else self._training_parameters.number_of_epochs + ) + self._training_parameters.number_of_workers = ( + number_of_workers if number_of_workers is not None else self._training_parameters.number_of_workers + ) + self._training_parameters.optimizer = ( + StochasticOptimizer[camel_to_snake(optimizer)] + if optimizer is not None + else self._training_parameters.optimizer + ) + self._training_parameters.random_seed = ( + random_seed if random_seed is not None else self._training_parameters.random_seed + ) + self._training_parameters.step_lr_gamma = ( + step_lr_gamma if step_lr_gamma is not None else self._training_parameters.step_lr_gamma + ) + self._training_parameters.step_lr_step_size = ( + step_lr_step_size if step_lr_step_size is not None else self._training_parameters.step_lr_step_size + ) + self._training_parameters.training_batch_size = ( + training_batch_size if training_batch_size is not None else self._training_parameters.training_batch_size + ) + self._training_parameters.validation_batch_size = ( + validation_batch_size + if validation_batch_size is not None + else self._training_parameters.validation_batch_size + ) + self._training_parameters.warmup_cosine_lr_cycles = ( + warmup_cosine_lr_cycles + if warmup_cosine_lr_cycles is not None + else self._training_parameters.warmup_cosine_lr_cycles + ) + self._training_parameters.warmup_cosine_lr_warmup_epochs = ( + warmup_cosine_lr_warmup_epochs + if warmup_cosine_lr_warmup_epochs is not None + else self._training_parameters.warmup_cosine_lr_warmup_epochs + ) + self._training_parameters.weight_decay = ( + weight_decay if weight_decay is not None else self._training_parameters.weight_decay + ) + self._training_parameters.box_detections_per_image = ( + box_detections_per_image + if box_detections_per_image is not None + else self._training_parameters.box_detections_per_image + ) + self._training_parameters.box_score_threshold = ( + box_score_threshold if box_score_threshold is not None else self._training_parameters.box_score_threshold + ) + self._training_parameters.image_size = ( + image_size if image_size is not None else self._training_parameters.image_size + ) + self._training_parameters.max_size = max_size if max_size is not None else self._training_parameters.max_size + self._training_parameters.min_size = min_size if min_size is not None else self._training_parameters.min_size + self._training_parameters.model_size = ( + ModelSize[camel_to_snake(model_size)] if model_size is not None else self._training_parameters.model_size + ) + self._training_parameters.multi_scale = ( + multi_scale if multi_scale is not None else self._training_parameters.multi_scale + ) + self._training_parameters.nms_iou_threshold = ( + nms_iou_threshold if nms_iou_threshold is not None else self._training_parameters.nms_iou_threshold + ) + self._training_parameters.tile_grid_size = ( + tile_grid_size if tile_grid_size is not None else self._training_parameters.tile_grid_size + ) + self._training_parameters.tile_overlap_ratio = ( + tile_overlap_ratio if tile_overlap_ratio is not None else self._training_parameters.tile_overlap_ratio + ) + self._training_parameters.tile_predictions_nms_threshold = ( + tile_predictions_nms_threshold + if tile_predictions_nms_threshold is not None + else self._training_parameters.tile_predictions_nms_threshold + ) + self._training_parameters.validation_iou_threshold = ( + validation_iou_threshold + if validation_iou_threshold is not None + else self._training_parameters.validation_iou_threshold + ) + self._training_parameters.validation_metric_type = ( + ValidationMetricType[camel_to_snake(validation_metric_type)] + if validation_metric_type is not None + else self._training_parameters.validation_metric_type + ) + self._training_parameters.log_training_metrics = ( + LogTrainingMetrics[camel_to_snake(log_training_metrics)] + if log_training_metrics is not None + else self._training_parameters.log_training_metrics + ) + self._training_parameters.log_validation_loss = ( + LogValidationLoss[camel_to_snake(log_validation_loss)] + if log_validation_loss is not None + else self._training_parameters.log_validation_loss + ) + + # pylint: enable=too-many-locals + + def extend_search_space( + self, + value: Union[SearchSpace, List[SearchSpace]], + ) -> None: + """Add search space for AutoML Image Object Detection and Image Instance Segmentation tasks. + + :param value: Search through the parameter space + :type value: Union[SearchSpace, List[SearchSpace]] + """ + self._search_space = self._search_space or [] + + if isinstance(value, list): + self._search_space.extend( + [ + cast_to_specific_search_space(item, ImageObjectDetectionSearchSpace, self.task_type) # type: ignore + for item in value + ] + ) + else: + self._search_space.append( + cast_to_specific_search_space(value, ImageObjectDetectionSearchSpace, self.task_type) # type: ignore + ) + + @classmethod + def _get_search_space_from_str(cls, search_space_str: str) -> Optional[List[ImageObjectDetectionSearchSpace]]: + return ( + [ + ImageObjectDetectionSearchSpace._from_rest_object(entry) + for entry in search_space_str + if entry is not None + ] + if search_space_str is not None + else None + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, AutoMLImageObjectDetectionBase): + return NotImplemented + + if not super().__eq__(other): + return False + + return self._training_parameters == other._training_parameters and self._search_space == other._search_space + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_job.py new file mode 100644 index 00000000..a1b9dbc3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_job.py @@ -0,0 +1,244 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageClassification as RestImageClassification +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.image.automl_image_classification_base import AutoMLImageClassificationBase +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsClassification +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ImageClassificationJob(AutoMLImageClassificationBase): + """Configuration for AutoML multi-class Image Classification job. + + :param primary_metric: The primary metric to use for optimization. + :type primary_metric: Optional[str, ~azure.ai.ml.automl.ClassificationMultilabelPrimaryMetrics] + :param kwargs: Job-specific arguments. + :type kwargs: Dict[str, Any] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_classification_job] + :end-before: [END automl.automl_image_job.image_classification_job] + :language: python + :dedent: 8 + :caption: creating an automl image classification job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + primary_metric: Optional[Union[str, ClassificationPrimaryMetrics]] = None, + **kwargs: Any, + ) -> None: + + # Extract any super class init settings + limits = kwargs.pop("limits", None) + sweep = kwargs.pop("sweep", None) + training_parameters = kwargs.pop("training_parameters", None) + search_space = kwargs.pop("search_space", None) + + super().__init__( + task_type=TaskType.IMAGE_CLASSIFICATION, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=search_space, + **kwargs, + ) + + self.primary_metric = primary_metric or ImageClassificationJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Optional[Union[str, ClassificationPrimaryMetrics]]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ImageClassificationJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + image_classification_task = RestImageClassification( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + model_settings=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest obj + self._resolve_data_inputs(image_classification_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=image_classification_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ImageClassificationJob": + properties: RestAutoMLJob = obj.properties + task_details: RestImageClassification = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + image_classification_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + limits=( + ImageLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + sweep=( + ImageSweepSettings._from_rest_object(task_details.sweep_settings) + if task_details.sweep_settings + else None + ), + training_parameters=( + ImageModelSettingsClassification._from_rest_object(task_details.model_settings) + if task_details.model_settings + else None + ), + search_space=cls._get_search_space_from_str(task_details.search_space), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + image_classification_job._restore_data_inputs() + + return image_classification_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ImageClassificationJob": + from azure.ai.ml._schema.automl.image_vertical.image_classification import ImageClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMulticlassNodeSchema + + inside_pipeline = kwargs.pop("inside_pipeline", False) + if inside_pipeline: + if context.get("inside_pipeline", None) is None: + context["inside_pipeline"] = True + loaded_data = load_from_dict( + ImageClassificationMulticlassNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(ImageClassificationSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ImageClassificationJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + } + job = ImageClassificationJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.image_vertical.image_classification import ImageClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMulticlassNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = ImageClassificationMulticlassNodeSchema( + context={BASE_PATH_CONTEXT_KEY: "./", "inside_pipeline": True} + ).dump(self) + else: + schema_dict = ImageClassificationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageClassificationJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_multilabel_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_multilabel_job.py new file mode 100644 index 00000000..541f41c7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_multilabel_job.py @@ -0,0 +1,252 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationMultilabelPrimaryMetrics +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ImageClassificationMultilabel as RestImageClassificationMultilabel, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.image.automl_image_classification_base import AutoMLImageClassificationBase +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsClassification +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ImageClassificationMultilabelJob(AutoMLImageClassificationBase): + """Configuration for AutoML multi-label Image Classification job. + + :param primary_metric: The primary metric to use for optimization. + :type primary_metric: Optional[str, ~azure.ai.ml.automl.ClassificationMultilabelPrimaryMetrics] + :param kwargs: Job-specific arguments. + :type kwargs: Dict[str, Any] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_classification_multilabel_job] + :end-before: [END automl.automl_image_job.image_classification_multilabel_job] + :language: python + :dedent: 8 + :caption: creating an automl image classification multilabel job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationMultilabelPrimaryMetrics.IOU + + def __init__( + self, + *, + primary_metric: Optional[Union[str, ClassificationMultilabelPrimaryMetrics]] = None, + **kwargs: Any, + ) -> None: + + # Extract any super class init settings + limits = kwargs.pop("limits", None) + sweep = kwargs.pop("sweep", None) + training_parameters = kwargs.pop("training_parameters", None) + search_space = kwargs.pop("search_space", None) + + super().__init__( + task_type=TaskType.IMAGE_CLASSIFICATION_MULTILABEL, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=search_space, + **kwargs, + ) + + self.primary_metric = primary_metric or ImageClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Union[str, ClassificationMultilabelPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationMultilabelPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ImageClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationMultilabelPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + image_classification_multilabel_task = RestImageClassificationMultilabel( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + model_settings=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest obj + self._resolve_data_inputs(image_classification_multilabel_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=image_classification_multilabel_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ImageClassificationMultilabelJob": + properties: RestAutoMLJob = obj.properties + task_details: RestImageClassificationMultilabel = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + image_classification_multilabel_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + limits=( + ImageLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + sweep=( + ImageSweepSettings._from_rest_object(task_details.sweep_settings) + if task_details.sweep_settings + else None + ), + training_parameters=( + ImageModelSettingsClassification._from_rest_object(task_details.model_settings) + if task_details.model_settings + else None + ), + search_space=cls._get_search_space_from_str(task_details.search_space), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + image_classification_multilabel_job._restore_data_inputs() + + return image_classification_multilabel_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ImageClassificationMultilabelJob": + from azure.ai.ml._schema.automl.image_vertical.image_classification import ImageClassificationMultilabelSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMultilabelNodeSchema + + inside_pipeline = kwargs.pop("inside_pipeline", False) + if inside_pipeline: + if context.get("inside_pipeline", None) is None: + context["inside_pipeline"] = True + loaded_data = load_from_dict( + ImageClassificationMultilabelNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict( + ImageClassificationMultilabelSchema, + data, + context, + additional_message, + **kwargs, + ) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ImageClassificationMultilabelJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + } + job = ImageClassificationMultilabelJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.image_vertical.image_classification import ImageClassificationMultilabelSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMultilabelNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = ImageClassificationMultilabelNodeSchema( + context={BASE_PATH_CONTEXT_KEY: "./", "inside_pipeline": True} + ).dump(self) + else: + schema_dict = ImageClassificationMultilabelSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageClassificationMultilabelJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_search_space.py new file mode 100644 index 00000000..0691f243 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_classification_search_space.py @@ -0,0 +1,437 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=R0902,too-many-locals + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageModelDistributionSettingsClassification +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.search_space_utils import _convert_from_rest_object, _convert_to_rest_object +from azure.ai.ml.entities._job.sweep.search_space import SweepDistribution +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ImageClassificationSearchSpace(RestTranslatableMixin): + """Search space for AutoML Image Classification and Image Classification + Multilabel tasks. + + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :type beta1: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :type beta2: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param distributed: Whether to use distributer training. + :type distributed: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait + before primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :type early_stopping_patience: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. + Must be a positive integer. + :type evaluation_frequency: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive + integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters#model-agnostic-hyperparameters. # pylint: disable=line-too-long + :type layers_to_freeze: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. + :type learning_rate_scheduler: str or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, + 1]. + :type momentum: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param optimizer: Type of optimizer. Must be either 'sgd', 'adam', or 'adamw'. + :type optimizer: str or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float + in the range [0, 1]. + :type step_lr_gamma: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be + a positive integer. + :type step_lr_step_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must + be a float in the range[0, 1]. + :type weight_decay: float or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param training_crop_size: Image crop size that is input to the neural network for the + training dataset. Must be a positive integer. + :type training_crop_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param validation_crop_size: Image crop size that is input to the neural network for the + validation dataset. Must be a positive integer. + :type validation_crop_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param validation_resize_size: Image size to which to resize before cropping for validation + dataset. Must be a positive integer. + :type validation_resize_size: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + :param weighted_loss: Weighted loss. The accepted values are 0 for no weighted loss. + 1 for weighted loss with sqrt.(class_weights). 2 for weighted loss with class_weights. Must be + 0 or 1 or 2. + :type weighted_loss: int or ~azure.ai.ml.entities._job.sweep.search_space.SweepDistribution + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_classification_search_space] + :end-before: [END automl.automl_image_job.image_classification_search_space] + :language: python + :dedent: 8 + :caption: Defining an automl image classification search space + """ + + def __init__( + self, + *, + ams_gradient: Optional[Union[bool, SweepDistribution]] = None, + beta1: Optional[Union[float, SweepDistribution]] = None, + beta2: Optional[Union[float, SweepDistribution]] = None, + distributed: Optional[Union[bool, SweepDistribution]] = None, + early_stopping: Optional[Union[bool, SweepDistribution]] = None, + early_stopping_delay: Optional[Union[int, SweepDistribution]] = None, + early_stopping_patience: Optional[Union[int, SweepDistribution]] = None, + enable_onnx_normalization: Optional[Union[bool, SweepDistribution]] = None, + evaluation_frequency: Optional[Union[int, SweepDistribution]] = None, + gradient_accumulation_step: Optional[Union[int, SweepDistribution]] = None, + layers_to_freeze: Optional[Union[int, SweepDistribution]] = None, + learning_rate: Optional[Union[float, SweepDistribution]] = None, + learning_rate_scheduler: Optional[Union[str, SweepDistribution]] = None, + model_name: Optional[Union[str, SweepDistribution]] = None, + momentum: Optional[Union[float, SweepDistribution]] = None, + nesterov: Optional[Union[bool, SweepDistribution]] = None, + number_of_epochs: Optional[Union[int, SweepDistribution]] = None, + number_of_workers: Optional[Union[int, SweepDistribution]] = None, + optimizer: Optional[Union[str, SweepDistribution]] = None, + random_seed: Optional[Union[int, SweepDistribution]] = None, + step_lr_gamma: Optional[Union[float, SweepDistribution]] = None, + step_lr_step_size: Optional[Union[int, SweepDistribution]] = None, + training_batch_size: Optional[Union[int, SweepDistribution]] = None, + validation_batch_size: Optional[Union[int, SweepDistribution]] = None, + warmup_cosine_lr_cycles: Optional[Union[float, SweepDistribution]] = None, + warmup_cosine_lr_warmup_epochs: Optional[Union[int, SweepDistribution]] = None, + weight_decay: Optional[Union[float, SweepDistribution]] = None, + training_crop_size: Optional[Union[int, SweepDistribution]] = None, + validation_crop_size: Optional[Union[int, SweepDistribution]] = None, + validation_resize_size: Optional[Union[int, SweepDistribution]] = None, + weighted_loss: Optional[Union[int, SweepDistribution]] = None, + ) -> None: + self.ams_gradient = ams_gradient + self.beta1 = beta1 + self.beta2 = beta2 + self.distributed = distributed + self.early_stopping = early_stopping + self.early_stopping_delay = early_stopping_delay + self.early_stopping_patience = early_stopping_patience + self.enable_onnx_normalization = enable_onnx_normalization + self.evaluation_frequency = evaluation_frequency + self.gradient_accumulation_step = gradient_accumulation_step + self.layers_to_freeze = layers_to_freeze + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.momentum = momentum + self.nesterov = nesterov + self.number_of_epochs = number_of_epochs + self.number_of_workers = number_of_workers + self.optimizer = optimizer + self.random_seed = random_seed + self.step_lr_gamma = step_lr_gamma + self.step_lr_step_size = step_lr_step_size + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_cosine_lr_cycles = warmup_cosine_lr_cycles + self.warmup_cosine_lr_warmup_epochs = warmup_cosine_lr_warmup_epochs + self.weight_decay = weight_decay + self.training_crop_size = training_crop_size + self.validation_crop_size = validation_crop_size + self.validation_resize_size = validation_resize_size + self.weighted_loss = weighted_loss + + def _to_rest_object(self) -> ImageModelDistributionSettingsClassification: + return ImageModelDistributionSettingsClassification( + ams_gradient=_convert_to_rest_object(self.ams_gradient) if self.ams_gradient is not None else None, + beta1=_convert_to_rest_object(self.beta1) if self.beta1 is not None else None, + beta2=_convert_to_rest_object(self.beta2) if self.beta2 is not None else None, + distributed=_convert_to_rest_object(self.distributed) if self.distributed is not None else None, + early_stopping=_convert_to_rest_object(self.early_stopping) if self.early_stopping is not None else None, + early_stopping_delay=( + _convert_to_rest_object(self.early_stopping_delay) if self.early_stopping_delay is not None else None + ), + early_stopping_patience=( + _convert_to_rest_object(self.early_stopping_patience) + if self.early_stopping_patience is not None + else None + ), + enable_onnx_normalization=( + _convert_to_rest_object(self.enable_onnx_normalization) + if self.enable_onnx_normalization is not None + else None + ), + evaluation_frequency=( + _convert_to_rest_object(self.evaluation_frequency) if self.evaluation_frequency is not None else None + ), + gradient_accumulation_step=( + _convert_to_rest_object(self.gradient_accumulation_step) + if self.gradient_accumulation_step is not None + else None + ), + layers_to_freeze=( + _convert_to_rest_object(self.layers_to_freeze) if self.layers_to_freeze is not None else None + ), + learning_rate=_convert_to_rest_object(self.learning_rate) if self.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_to_rest_object(self.learning_rate_scheduler) + if self.learning_rate_scheduler is not None + else None + ), + model_name=_convert_to_rest_object(self.model_name) if self.model_name is not None else None, + momentum=_convert_to_rest_object(self.momentum) if self.momentum is not None else None, + nesterov=_convert_to_rest_object(self.nesterov) if self.nesterov is not None else None, + number_of_epochs=( + _convert_to_rest_object(self.number_of_epochs) if self.number_of_epochs is not None else None + ), + number_of_workers=( + _convert_to_rest_object(self.number_of_workers) if self.number_of_workers is not None else None + ), + optimizer=_convert_to_rest_object(self.optimizer) if self.optimizer is not None else None, + random_seed=_convert_to_rest_object(self.random_seed) if self.random_seed is not None else None, + step_lr_gamma=_convert_to_rest_object(self.step_lr_gamma) if self.step_lr_gamma is not None else None, + step_lr_step_size=( + _convert_to_rest_object(self.step_lr_step_size) if self.step_lr_step_size is not None else None + ), + training_batch_size=( + _convert_to_rest_object(self.training_batch_size) if self.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_to_rest_object(self.validation_batch_size) if self.validation_batch_size is not None else None + ), + warmup_cosine_lr_cycles=( + _convert_to_rest_object(self.warmup_cosine_lr_cycles) + if self.warmup_cosine_lr_cycles is not None + else None + ), + warmup_cosine_lr_warmup_epochs=( + _convert_to_rest_object(self.warmup_cosine_lr_warmup_epochs) + if self.warmup_cosine_lr_warmup_epochs is not None + else None + ), + weight_decay=_convert_to_rest_object(self.weight_decay) if self.weight_decay is not None else None, + training_crop_size=( + _convert_to_rest_object(self.training_crop_size) if self.training_crop_size is not None else None + ), + validation_crop_size=( + _convert_to_rest_object(self.validation_crop_size) if self.validation_crop_size is not None else None + ), + validation_resize_size=( + _convert_to_rest_object(self.validation_resize_size) + if self.validation_resize_size is not None + else None + ), + weighted_loss=_convert_to_rest_object(self.weighted_loss) if self.weighted_loss is not None else None, + ) + + @classmethod + def _from_rest_object(cls, obj: ImageModelDistributionSettingsClassification) -> "ImageClassificationSearchSpace": + return cls( + ams_gradient=_convert_from_rest_object(obj.ams_gradient) if obj.ams_gradient is not None else None, + beta1=_convert_from_rest_object(obj.beta1) if obj.beta1 is not None else None, + beta2=_convert_from_rest_object(obj.beta2) if obj.beta2 is not None else None, + distributed=_convert_from_rest_object(obj.distributed) if obj.distributed is not None else None, + early_stopping=_convert_from_rest_object(obj.early_stopping) if obj.early_stopping is not None else None, + early_stopping_delay=( + _convert_from_rest_object(obj.early_stopping_delay) if obj.early_stopping_delay is not None else None + ), + early_stopping_patience=( + _convert_from_rest_object(obj.early_stopping_patience) + if obj.early_stopping_patience is not None + else None + ), + enable_onnx_normalization=( + _convert_from_rest_object(obj.enable_onnx_normalization) + if obj.enable_onnx_normalization is not None + else None + ), + evaluation_frequency=( + _convert_from_rest_object(obj.evaluation_frequency) if obj.evaluation_frequency is not None else None + ), + gradient_accumulation_step=( + _convert_from_rest_object(obj.gradient_accumulation_step) + if obj.gradient_accumulation_step is not None + else None + ), + layers_to_freeze=( + _convert_from_rest_object(obj.layers_to_freeze) if obj.layers_to_freeze is not None else None + ), + learning_rate=_convert_from_rest_object(obj.learning_rate) if obj.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_from_rest_object(obj.learning_rate_scheduler) + if obj.learning_rate_scheduler is not None + else None + ), + model_name=_convert_from_rest_object(obj.model_name) if obj.model_name is not None else None, + momentum=_convert_from_rest_object(obj.momentum) if obj.momentum is not None else None, + nesterov=_convert_from_rest_object(obj.nesterov) if obj.nesterov is not None else None, + number_of_epochs=( + _convert_from_rest_object(obj.number_of_epochs) if obj.number_of_epochs is not None else None + ), + number_of_workers=( + _convert_from_rest_object(obj.number_of_workers) if obj.number_of_workers is not None else None + ), + optimizer=_convert_from_rest_object(obj.optimizer) if obj.optimizer is not None else None, + random_seed=_convert_from_rest_object(obj.random_seed) if obj.random_seed is not None else None, + step_lr_gamma=_convert_from_rest_object(obj.step_lr_gamma) if obj.step_lr_gamma is not None else None, + step_lr_step_size=( + _convert_from_rest_object(obj.step_lr_step_size) if obj.step_lr_step_size is not None else None + ), + training_batch_size=( + _convert_from_rest_object(obj.training_batch_size) if obj.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_from_rest_object(obj.validation_batch_size) if obj.validation_batch_size is not None else None + ), + warmup_cosine_lr_cycles=( + _convert_from_rest_object(obj.warmup_cosine_lr_cycles) + if obj.warmup_cosine_lr_cycles is not None + else None + ), + warmup_cosine_lr_warmup_epochs=( + _convert_from_rest_object(obj.warmup_cosine_lr_warmup_epochs) + if obj.warmup_cosine_lr_warmup_epochs is not None + else None + ), + weight_decay=_convert_from_rest_object(obj.weight_decay) if obj.weight_decay is not None else None, + training_crop_size=( + _convert_from_rest_object(obj.training_crop_size) if obj.training_crop_size is not None else None + ), + validation_crop_size=( + _convert_from_rest_object(obj.validation_crop_size) if obj.validation_crop_size is not None else None + ), + validation_resize_size=( + _convert_from_rest_object(obj.validation_resize_size) + if obj.validation_resize_size is not None + else None + ), + weighted_loss=_convert_from_rest_object(obj.weighted_loss) if obj.weighted_loss is not None else None, + ) + + @classmethod + def _from_search_space_object(cls, obj: SearchSpace) -> "ImageClassificationSearchSpace": + return cls( + ams_gradient=obj.ams_gradient if hasattr(obj, "ams_gradient") else None, + beta1=obj.beta1 if hasattr(obj, "beta1") else None, + beta2=obj.beta2 if hasattr(obj, "beta2") else None, + distributed=obj.distributed if hasattr(obj, "distributed") else None, + early_stopping=obj.early_stopping if hasattr(obj, "early_stopping") else None, + early_stopping_delay=obj.early_stopping_delay if hasattr(obj, "early_stopping_delay") else None, + early_stopping_patience=obj.early_stopping_patience if hasattr(obj, "early_stopping_patience") else None, + enable_onnx_normalization=( + obj.enable_onnx_normalization if hasattr(obj, "enable_onnx_normalization") else None + ), + evaluation_frequency=obj.evaluation_frequency if hasattr(obj, "evaluation_frequency") else None, + gradient_accumulation_step=( + obj.gradient_accumulation_step if hasattr(obj, "gradient_accumulation_step") else None + ), + layers_to_freeze=obj.layers_to_freeze if hasattr(obj, "layers_to_freeze") else None, + learning_rate=obj.learning_rate if hasattr(obj, "learning_rate") else None, + learning_rate_scheduler=obj.learning_rate_scheduler if hasattr(obj, "learning_rate_scheduler") else None, + model_name=obj.model_name if hasattr(obj, "model_name") else None, + momentum=obj.momentum if hasattr(obj, "momentum") else None, + nesterov=obj.nesterov if hasattr(obj, "nesterov") else None, + number_of_epochs=obj.number_of_epochs if hasattr(obj, "number_of_epochs") else None, + number_of_workers=obj.number_of_workers if hasattr(obj, "number_of_workers") else None, + optimizer=obj.optimizer if hasattr(obj, "optimizer") else None, + random_seed=obj.random_seed if hasattr(obj, "random_seed") else None, + step_lr_gamma=obj.step_lr_gamma if hasattr(obj, "step_lr_gamma") else None, + step_lr_step_size=obj.step_lr_step_size if hasattr(obj, "step_lr_step_size") else None, + training_batch_size=obj.training_batch_size if hasattr(obj, "training_batch_size") else None, + validation_batch_size=obj.validation_batch_size if hasattr(obj, "validation_batch_size") else None, + warmup_cosine_lr_cycles=obj.warmup_cosine_lr_cycles if hasattr(obj, "warmup_cosine_lr_cycles") else None, + warmup_cosine_lr_warmup_epochs=( + obj.warmup_cosine_lr_warmup_epochs if hasattr(obj, "warmup_cosine_lr_warmup_epochs") else None + ), + weight_decay=obj.weight_decay if hasattr(obj, "weight_decay") else None, + training_crop_size=obj.training_crop_size if hasattr(obj, "training_crop_size") else None, + validation_crop_size=obj.validation_crop_size if hasattr(obj, "validation_crop_size") else None, + validation_resize_size=obj.validation_resize_size if hasattr(obj, "validation_resize_size") else None, + weighted_loss=obj.weighted_loss if hasattr(obj, "weighted_loss") else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageClassificationSearchSpace): + return NotImplemented + + return ( + self.ams_gradient == other.ams_gradient + and self.beta1 == other.beta1 + and self.beta2 == other.beta2 + and self.distributed == other.distributed + and self.early_stopping == other.early_stopping + and self.early_stopping_delay == other.early_stopping_delay + and self.early_stopping_patience == other.early_stopping_patience + and self.enable_onnx_normalization == other.enable_onnx_normalization + and self.evaluation_frequency == other.evaluation_frequency + and self.gradient_accumulation_step == other.gradient_accumulation_step + and self.layers_to_freeze == other.layers_to_freeze + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.momentum == other.momentum + and self.nesterov == other.nesterov + and self.number_of_epochs == other.number_of_epochs + and self.number_of_workers == other.number_of_workers + and self.optimizer == other.optimizer + and self.random_seed == other.random_seed + and self.step_lr_gamma == other.step_lr_gamma + and self.step_lr_step_size == other.step_lr_step_size + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_cosine_lr_cycles == other.warmup_cosine_lr_cycles + and self.warmup_cosine_lr_warmup_epochs == other.warmup_cosine_lr_warmup_epochs + and self.weight_decay == other.weight_decay + and self.training_crop_size == other.training_crop_size + and self.validation_crop_size == other.validation_crop_size + and self.validation_resize_size == other.validation_resize_size + and self.weighted_loss == other.weighted_loss + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_instance_segmentation_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_instance_segmentation_job.py new file mode 100644 index 00000000..c97d3c11 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_instance_segmentation_job.py @@ -0,0 +1,249 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ImageInstanceSegmentation as RestImageInstanceSegmentation, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import InstanceSegmentationPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.image.automl_image_object_detection_base import AutoMLImageObjectDetectionBase +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsObjectDetection +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ImageInstanceSegmentationJob(AutoMLImageObjectDetectionBase): + """Configuration for AutoML Image Instance Segmentation job. + + :keyword primary_metric: The primary metric to use for optimization. + :paramtype primary_metric: Optional[str, ~azure.ai.ml.automl.InstanceSegmentationPrimaryMetrics] + :keyword kwargs: Job-specific arguments. + :paramtype kwargs: Dict[str, Any] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_instance_segmentation_job] + :end-before: [END automl.automl_image_job.image_instance_segmentation_job] + :language: python + :dedent: 8 + :caption: creating an automl image instance segmentation job + """ + + _DEFAULT_PRIMARY_METRIC = InstanceSegmentationPrimaryMetrics.MEAN_AVERAGE_PRECISION + + def __init__( + self, + *, + primary_metric: Optional[Union[str, InstanceSegmentationPrimaryMetrics]] = None, + **kwargs: Any, + ) -> None: + # Extract any super class init settings + limits = kwargs.pop("limits", None) + sweep = kwargs.pop("sweep", None) + training_parameters = kwargs.pop("training_parameters", None) + search_space = kwargs.pop("search_space", None) + + super().__init__( + task_type=TaskType.IMAGE_INSTANCE_SEGMENTATION, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=search_space, + **kwargs, + ) + self.primary_metric = primary_metric or ImageInstanceSegmentationJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Union[str, InstanceSegmentationPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, InstanceSegmentationPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ImageInstanceSegmentationJob._DEFAULT_PRIMARY_METRIC + if value is None + else InstanceSegmentationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + image_instance_segmentation_task = RestImageInstanceSegmentation( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + model_settings=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest obj + self._resolve_data_inputs(image_instance_segmentation_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=image_instance_segmentation_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ImageInstanceSegmentationJob": + properties: RestAutoMLJob = obj.properties + task_details: RestImageInstanceSegmentation = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + image_instance_segmentation_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + limits=( + ImageLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + sweep=( + ImageSweepSettings._from_rest_object(task_details.sweep_settings) + if task_details.sweep_settings + else None + ), + training_parameters=( + ImageModelSettingsObjectDetection._from_rest_object(task_details.model_settings) + if task_details.model_settings + else None + ), + search_space=cls._get_search_space_from_str(task_details.search_space), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + image_instance_segmentation_job._restore_data_inputs() + + return image_instance_segmentation_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ImageInstanceSegmentationJob": + from azure.ai.ml._schema.automl.image_vertical.image_object_detection import ImageInstanceSegmentationSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageInstanceSegmentationNodeSchema + + inside_pipeline = kwargs.pop("inside_pipeline", False) + if inside_pipeline: + if context.get("inside_pipeline", None) is None: + context["inside_pipeline"] = True + loaded_data = load_from_dict( + ImageInstanceSegmentationNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict( + ImageInstanceSegmentationSchema, + data, + context, + additional_message, + **kwargs, + ) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ImageInstanceSegmentationJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + } + job = ImageInstanceSegmentationJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.image_vertical.image_object_detection import ImageInstanceSegmentationSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageInstanceSegmentationNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = ImageInstanceSegmentationNodeSchema( + context={BASE_PATH_CONTEXT_KEY: "./", "inside_pipeline": True} + ).dump(self) + else: + schema_dict = ImageInstanceSegmentationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageInstanceSegmentationJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_limit_settings.py new file mode 100644 index 00000000..12ec8b57 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_limit_settings.py @@ -0,0 +1,117 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageLimitSettings as RestImageLimitSettings +from azure.ai.ml._utils.utils import from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ImageLimitSettings(RestTranslatableMixin): + r"""Limit settings for AutoML Image Verticals. + + ImageLimitSettings is a class that contains the following parameters: max_concurrent_trials, max_trials, and \ + timeout_minutes. + + This is an optional configuration method to configure limits parameters such as timeouts etc. + + .. note:: + + The number of concurrent runs is gated on the resources available in the specified compute target. + Ensure that the compute target has the available resources for the desired concurrency. + + :keyword max_concurrent_trials: Maximum number of concurrent AutoML iterations, defaults to None. + :paramtype max_concurrent_trials: typing.Optional[int] + :keyword max_trials: Represents the maximum number of trials (children jobs). + :paramtype max_trials: typing.Optional[int] + :keyword timeout_minutes: AutoML job timeout. Defaults to None + :paramtype timeout_minutes: typing.Optional[int] + :raises ValueError: If max_concurrent_trials is not None and is not a positive integer. + :raises ValueError: If max_trials is not None and is not a positive integer. + :raises ValueError: If timeout_minutes is not None and is not a positive integer. + :return: ImageLimitSettings object. + :rtype: ImageLimitSettings + + .. tip:: + It's a good practice to match max_concurrent_trials count with the number of nodes in the cluster. + For example, if you have a cluster with 4 nodes, set max_concurrent_trials to 4. + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_limit_settings] + :end-before: [END automl.automl_image_job.image_limit_settings] + :language: python + :dedent: 8 + :caption: Defining the limit settings for an automl image job. + """ + + def __init__( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_trials: Optional[int] = None, + timeout_minutes: Optional[int] = None, + ) -> None: + self.max_concurrent_trials = max_concurrent_trials + self.max_trials = max_trials + self.timeout_minutes = timeout_minutes + + def _to_rest_object(self) -> RestImageLimitSettings: + """Convert ImageLimitSettings objects to a rest object. + + :return: A rest object of ImageLimitSettings objects. + :rtype: RestImageLimitSettings + """ + return RestImageLimitSettings( + max_concurrent_trials=self.max_concurrent_trials, + max_trials=self.max_trials, + timeout=to_iso_duration_format_mins(self.timeout_minutes), + ) + + @classmethod + def _from_rest_object(cls, obj: RestImageLimitSettings) -> "ImageLimitSettings": + """Convert the rest object to a dict containing items to init the ImageLimitSettings objects. + + :param obj: Limit settings for the AutoML job in Rest format. + :type obj: RestImageLimitSettings + :return: Limit settings for an AutoML Image Vertical. + :rtype: ImageLimitSettings + """ + return cls( + max_concurrent_trials=obj.max_concurrent_trials, + max_trials=obj.max_trials, + timeout_minutes=from_iso_duration_format_mins(obj.timeout), + ) + + def __eq__(self, other: object) -> bool: + """Check equality between two ImageLimitSettings objects. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, ImageLimitSettings): + return NotImplemented + + return ( + self.max_concurrent_trials == other.max_concurrent_trials + and self.max_trials == other.max_trials + and self.timeout_minutes == other.timeout_minutes + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two ImageLimitSettings objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_model_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_model_settings.py new file mode 100644 index 00000000..890f987a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_model_settings.py @@ -0,0 +1,876 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Any, Optional + +# pylint: disable=R0902,too-many-locals +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ImageModelSettingsClassification as RestImageModelSettingsClassification, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ImageModelSettingsObjectDetection as RestImageModelSettingsObjectDetection, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LearningRateScheduler, + LogTrainingMetrics, + LogValidationLoss, + ModelSize, + StochasticOptimizer, + ValidationMetricType, +) +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ImageModelDistributionSettings(RestTranslatableMixin): + """Model settings for all AutoML Image Verticals. + Please do not instantiate directly. Use the child classes instead. + + :param advanced_settings: Settings for advanced scenarios. + :type advanced_settings: str + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta1: float + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta2: float + :param checkpoint_frequency: Frequency to store model checkpoints. Must be a positive integer. + :type checkpoint_frequency: int + :param checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :type checkpoint_run_id: str + :param distributed: Whether to use distributed training. + :type distributed: bool + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait before + primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :type early_stopping_patience: int + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. Must + be a positive integer. + :type evaluation_frequency: int + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type layers_to_freeze: int + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, 1]. + :type momentum: float + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int + :param optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float in + the range [0, 1]. + :type step_lr_gamma: float + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be a + positive integer. + :type step_lr_step_size: int + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must be + a float in the range[0, 1]. + :type weight_decay: float + """ + + def __init__( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[LearningRateScheduler] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[StochasticOptimizer] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + ): + self.advanced_settings = advanced_settings + self.ams_gradient = ams_gradient + self.beta1 = beta1 + self.beta2 = beta2 + self.checkpoint_frequency = checkpoint_frequency + self.checkpoint_run_id = checkpoint_run_id + self.distributed = distributed + self.early_stopping = early_stopping + self.early_stopping_delay = early_stopping_delay + self.early_stopping_patience = early_stopping_patience + self.enable_onnx_normalization = enable_onnx_normalization + self.evaluation_frequency = evaluation_frequency + self.gradient_accumulation_step = gradient_accumulation_step + self.layers_to_freeze = layers_to_freeze + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.momentum = momentum + self.nesterov = nesterov + self.number_of_epochs = number_of_epochs + self.number_of_workers = number_of_workers + self.optimizer = optimizer + self.random_seed = random_seed + self.step_lr_gamma = step_lr_gamma + self.step_lr_step_size = step_lr_step_size + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_cosine_lr_cycles = warmup_cosine_lr_cycles + self.warmup_cosine_lr_warmup_epochs = warmup_cosine_lr_warmup_epochs + self.weight_decay = weight_decay + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageModelDistributionSettings): + return NotImplemented + + return ( + self.advanced_settings == other.advanced_settings + and self.ams_gradient == other.ams_gradient + and self.beta1 == other.beta1 + and self.beta2 == other.beta2 + and self.checkpoint_frequency == other.checkpoint_frequency + and self.checkpoint_run_id == other.checkpoint_run_id + and self.distributed == other.distributed + and self.early_stopping == other.early_stopping + and self.early_stopping_delay == other.early_stopping_delay + and self.early_stopping_patience == other.early_stopping_patience + and self.enable_onnx_normalization == other.enable_onnx_normalization + and self.evaluation_frequency == other.evaluation_frequency + and self.gradient_accumulation_step == other.gradient_accumulation_step + and self.layers_to_freeze == other.layers_to_freeze + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.momentum == other.momentum + and self.nesterov == other.nesterov + and self.number_of_epochs == other.number_of_epochs + and self.number_of_workers == other.number_of_workers + and self.optimizer == other.optimizer + and self.random_seed == other.random_seed + and self.step_lr_gamma == other.step_lr_gamma + and self.step_lr_step_size == other.step_lr_step_size + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_cosine_lr_cycles == other.warmup_cosine_lr_cycles + and self.warmup_cosine_lr_warmup_epochs == other.warmup_cosine_lr_warmup_epochs + and self.weight_decay == other.weight_decay + ) + + +class ImageModelSettingsClassification(ImageModelDistributionSettings): + """Model settings for AutoML Image Classification tasks. + + :param advanced_settings: Settings for advanced scenarios. + :type advanced_settings: str + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta1: float + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta2: float + :param checkpoint_frequency: Frequency to store model checkpoints. Must be a positive integer. + :type checkpoint_frequency: int + :param checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :type checkpoint_run_id: str + :param distributed: Whether to use distributed training. + :type distributed: bool + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait before + primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :type early_stopping_patience: int + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. Must + be a positive integer. + :type evaluation_frequency: int + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type layers_to_freeze: int + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, 1]. + :type momentum: float + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int + :param optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float in + the range [0, 1]. + :type step_lr_gamma: float + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be a + positive integer. + :type step_lr_step_size: int + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must be + a float in the range[0, 1]. + :type weight_decay: float + :param training_crop_size: Image crop size that is input to the neural network for the training + dataset. Must be a positive integer. + :type training_crop_size: int + :param validation_crop_size: Image crop size that is input to the neural network for the + validation dataset. Must be a positive integer. + :type validation_crop_size: int + :param validation_resize_size: Image size to which to resize before cropping for validation + dataset. Must be a positive integer. + :type validation_resize_size: int + :param weighted_loss: Weighted loss. The accepted values are 0 for no weighted loss. + 1 for weighted loss with sqrt.(class_weights). 2 for weighted loss with class_weights. Must be + 0 or 1 or 2. + :type weighted_loss: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_classification_model_settings] + :end-before: [END automl.automl_image_job.image_classification_model_settings] + :language: python + :dedent: 8 + :caption: Defining the automl image classification model settings. + """ + + def __init__( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[LearningRateScheduler] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[StochasticOptimizer] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + training_crop_size: Optional[int] = None, + validation_crop_size: Optional[int] = None, + validation_resize_size: Optional[int] = None, + weighted_loss: Optional[int] = None, + **kwargs: Any, + ): + super(ImageModelSettingsClassification, self).__init__( + advanced_settings=advanced_settings, + ams_gradient=ams_gradient, + beta1=beta1, + beta2=beta2, + checkpoint_frequency=checkpoint_frequency, + checkpoint_run_id=checkpoint_run_id, + distributed=distributed, + early_stopping=early_stopping, + early_stopping_delay=early_stopping_delay, + early_stopping_patience=early_stopping_patience, + enable_onnx_normalization=enable_onnx_normalization, + evaluation_frequency=evaluation_frequency, + gradient_accumulation_step=gradient_accumulation_step, + layers_to_freeze=layers_to_freeze, + learning_rate=learning_rate, + learning_rate_scheduler=learning_rate_scheduler, + model_name=model_name, + momentum=momentum, + nesterov=nesterov, + number_of_epochs=number_of_epochs, + number_of_workers=number_of_workers, + optimizer=optimizer, + random_seed=random_seed, + step_lr_gamma=step_lr_gamma, + step_lr_step_size=step_lr_step_size, + training_batch_size=training_batch_size, + validation_batch_size=validation_batch_size, + warmup_cosine_lr_cycles=warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=warmup_cosine_lr_warmup_epochs, + weight_decay=weight_decay, + **kwargs, + ) + self.training_crop_size = training_crop_size + self.validation_crop_size = validation_crop_size + self.validation_resize_size = validation_resize_size + self.weighted_loss = weighted_loss + + def _to_rest_object(self) -> RestImageModelSettingsClassification: + return RestImageModelSettingsClassification( + advanced_settings=self.advanced_settings, + ams_gradient=self.ams_gradient, + beta1=self.beta1, + beta2=self.beta2, + checkpoint_frequency=self.checkpoint_frequency, + checkpoint_run_id=self.checkpoint_run_id, + distributed=self.distributed, + early_stopping=self.early_stopping, + early_stopping_delay=self.early_stopping_delay, + early_stopping_patience=self.early_stopping_patience, + enable_onnx_normalization=self.enable_onnx_normalization, + evaluation_frequency=self.evaluation_frequency, + gradient_accumulation_step=self.gradient_accumulation_step, + layers_to_freeze=self.layers_to_freeze, + learning_rate=self.learning_rate, + learning_rate_scheduler=self.learning_rate_scheduler, + model_name=self.model_name, + momentum=self.momentum, + nesterov=self.nesterov, + number_of_epochs=self.number_of_epochs, + number_of_workers=self.number_of_workers, + optimizer=self.optimizer, + random_seed=self.random_seed, + step_lr_gamma=self.step_lr_gamma, + step_lr_step_size=self.step_lr_step_size, + training_batch_size=self.training_batch_size, + validation_batch_size=self.validation_batch_size, + warmup_cosine_lr_cycles=self.warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=self.warmup_cosine_lr_warmup_epochs, + weight_decay=self.weight_decay, + training_crop_size=self.training_crop_size, + validation_crop_size=self.validation_crop_size, + validation_resize_size=self.validation_resize_size, + weighted_loss=self.weighted_loss, + ) + + @classmethod + def _from_rest_object(cls, obj: RestImageModelSettingsClassification) -> "ImageModelSettingsClassification": + return cls( + advanced_settings=obj.advanced_settings, + ams_gradient=obj.ams_gradient, + beta1=obj.beta1, + beta2=obj.beta2, + checkpoint_frequency=obj.checkpoint_frequency, + checkpoint_run_id=obj.checkpoint_run_id, + distributed=obj.distributed, + early_stopping=obj.early_stopping, + early_stopping_delay=obj.early_stopping_delay, + early_stopping_patience=obj.early_stopping_patience, + enable_onnx_normalization=obj.enable_onnx_normalization, + evaluation_frequency=obj.evaluation_frequency, + gradient_accumulation_step=obj.gradient_accumulation_step, + layers_to_freeze=obj.layers_to_freeze, + learning_rate=obj.learning_rate, + learning_rate_scheduler=obj.learning_rate_scheduler, + model_name=obj.model_name, + momentum=obj.momentum, + nesterov=obj.nesterov, + number_of_epochs=obj.number_of_epochs, + number_of_workers=obj.number_of_workers, + optimizer=obj.optimizer, + random_seed=obj.random_seed, + step_lr_gamma=obj.step_lr_gamma, + step_lr_step_size=obj.step_lr_step_size, + training_batch_size=obj.training_batch_size, + validation_batch_size=obj.validation_batch_size, + warmup_cosine_lr_cycles=obj.warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=obj.warmup_cosine_lr_warmup_epochs, + weight_decay=obj.weight_decay, + training_crop_size=obj.training_crop_size, + validation_crop_size=obj.validation_crop_size, + validation_resize_size=obj.validation_resize_size, + weighted_loss=obj.weighted_loss, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageModelSettingsClassification): + return NotImplemented + + return ( + super().__eq__(other) + and self.training_crop_size == other.training_crop_size + and self.validation_crop_size == other.validation_crop_size + and self.validation_resize_size == other.validation_resize_size + and self.weighted_loss == other.weighted_loss + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +class ImageModelSettingsObjectDetection(ImageModelDistributionSettings): + """Model settings for AutoML Image Object Detection Task. + + :param advanced_settings: Settings for advanced scenarios. + :type advanced_settings: str + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta1: float + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the range + [0, 1]. + :type beta2: float + :param checkpoint_frequency: Frequency to store model checkpoints. Must be a positive integer. + :type checkpoint_frequency: int + :param checkpoint_run_id: The id of a previous run that has a pretrained checkpoint for + incremental training. + :type checkpoint_run_id: str + :param distributed: Whether to use distributed training. + :type distributed: bool + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait before + primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before + the run is stopped. Must be a positive integer. + :type early_stopping_patience: int + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. Must + be a positive integer. + :type evaluation_frequency: int + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without + updating the model weights while accumulating the gradients of those steps, and then using + the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive integer. + For instance, passing 2 as value for 'seresnext' means + freezing layer0 and layer1. For a full list of models supported and details on layer freeze, + please + see: https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type layers_to_freeze: int + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. Possible values include: "None", "WarmupCosine", "Step". + :type learning_rate_scheduler: str or + ~azure.mgmt.machinelearningservices.models.LearningRateScheduler + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, 1]. + :type momentum: float + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int + :param optimizer: Type of optimizer. Possible values include: "None", "Sgd", "Adam", "Adamw". + :type optimizer: str or ~azure.mgmt.machinelearningservices.models.StochasticOptimizer + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float in + the range [0, 1]. + :type step_lr_gamma: float + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be a + positive integer. + :type step_lr_step_size: int + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must be + a float in the range[0, 1]. + :type weight_decay: float + :param box_detections_per_image: Maximum number of detections per image, for all classes. Must + be a positive integer. + Note: This settings is not supported for the 'yolov5' algorithm. + :type box_detections_per_image: int + :param box_score_threshold: During inference, only return proposals with a classification score + greater than + BoxScoreThreshold. Must be a float in the range[0, 1]. + :type box_score_threshold: float + :param image_size: Image size for train and validation. Must be a positive integer. + Note: The training run may get into CUDA OOM if the size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type image_size: int + :param max_size: Maximum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type max_size: int + :param min_size: Minimum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type min_size: int + :param model_size: Model size. Must be 'small', 'medium', 'large'. + Note: training run may get into CUDA OOM if the model size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. Possible values include: + "None", "Small", "Medium", "Large", "ExtraLarge". + :type model_size: str or ~azure.mgmt.machinelearningservices.models.ModelSize + :param multi_scale: Enable multi-scale image by varying image size by +/- 50%. + Note: training run may get into CUDA OOM if no sufficient GPU memory. + Note: This settings is only supported for the 'yolov5' algorithm. + :type multi_scale: bool + :param nms_iou_threshold: IOU threshold used during inference in NMS post processing. Must be a + float in the range [0, 1]. + :type nms_iou_threshold: float + :param tile_grid_size: The grid size to use for tiling each image. Note: TileGridSize must not + be + None to enable small object detection logic. A string containing two integers in mxn format. + Note: This settings is not supported for the 'yolov5' algorithm. + :type tile_grid_size: str + :param tile_overlap_ratio: Overlap ratio between adjacent tiles in each dimension. Must be float + in the range [0, 1). + Note: This settings is not supported for the 'yolov5' algorithm. + :type tile_overlap_ratio: float + :param tile_predictions_nms_threshold: The IOU threshold to use to perform NMS while merging + predictions from tiles and image. + Used in validation/ inference. Must be float in the range [0, 1]. + Note: This settings is not supported for the 'yolov5' algorithm. + :type tile_predictions_nms_threshold: float + :param validation_iou_threshold: IOU threshold to use when computing validation metric. Must be + float in the range [0, 1]. + :type validation_iou_threshold: float + :param validation_metric_type: Metric computation method to use for validation metrics. Possible + values include: "None", "Coco", "Voc", "CocoVoc". + :type validation_metric_type: str or + ~azure.mgmt.machinelearningservices.models.ValidationMetricType + :param log_training_metrics: indicates whether or not to log training metrics + :type log_training_metrics: str or + ~azure.mgmt.machinelearningservices.models.LogTrainingMetrics + :param log_validation_loss: indicates whether or not to log validation loss + :type log_validation_loss: str or + ~azure.mgmt.machinelearningservices.models.LogValidationLoss + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_object_detection_model_settings] + :end-before: [END automl.automl_image_job.image_object_detection_model_settings] + :language: python + :dedent: 8 + :caption: Defining the automl image object detection or instance segmentation model settings. + """ + + def __init__( + self, + *, + advanced_settings: Optional[str] = None, + ams_gradient: Optional[bool] = None, + beta1: Optional[float] = None, + beta2: Optional[float] = None, + checkpoint_frequency: Optional[int] = None, + checkpoint_run_id: Optional[str] = None, + distributed: Optional[bool] = None, + early_stopping: Optional[bool] = None, + early_stopping_delay: Optional[int] = None, + early_stopping_patience: Optional[int] = None, + enable_onnx_normalization: Optional[bool] = None, + evaluation_frequency: Optional[int] = None, + gradient_accumulation_step: Optional[int] = None, + layers_to_freeze: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[LearningRateScheduler] = None, + model_name: Optional[str] = None, + momentum: Optional[float] = None, + nesterov: Optional[bool] = None, + number_of_epochs: Optional[int] = None, + number_of_workers: Optional[int] = None, + optimizer: Optional[StochasticOptimizer] = None, + random_seed: Optional[int] = None, + step_lr_gamma: Optional[float] = None, + step_lr_step_size: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_cosine_lr_cycles: Optional[float] = None, + warmup_cosine_lr_warmup_epochs: Optional[int] = None, + weight_decay: Optional[float] = None, + box_detections_per_image: Optional[int] = None, + box_score_threshold: Optional[float] = None, + image_size: Optional[int] = None, + max_size: Optional[int] = None, + min_size: Optional[int] = None, + model_size: Optional[ModelSize] = None, + multi_scale: Optional[bool] = None, + nms_iou_threshold: Optional[float] = None, + tile_grid_size: Optional[str] = None, + tile_overlap_ratio: Optional[float] = None, + tile_predictions_nms_threshold: Optional[float] = None, + validation_iou_threshold: Optional[float] = None, + validation_metric_type: Optional[ValidationMetricType] = None, + log_training_metrics: Optional[LogTrainingMetrics] = None, + log_validation_loss: Optional[LogValidationLoss] = None, + **kwargs: Any, + ): + super(ImageModelSettingsObjectDetection, self).__init__( + advanced_settings=advanced_settings, + ams_gradient=ams_gradient, + beta1=beta1, + beta2=beta2, + checkpoint_frequency=checkpoint_frequency, + checkpoint_run_id=checkpoint_run_id, + distributed=distributed, + early_stopping=early_stopping, + early_stopping_delay=early_stopping_delay, + early_stopping_patience=early_stopping_patience, + enable_onnx_normalization=enable_onnx_normalization, + evaluation_frequency=evaluation_frequency, + gradient_accumulation_step=gradient_accumulation_step, + layers_to_freeze=layers_to_freeze, + learning_rate=learning_rate, + learning_rate_scheduler=learning_rate_scheduler, + model_name=model_name, + momentum=momentum, + nesterov=nesterov, + number_of_epochs=number_of_epochs, + number_of_workers=number_of_workers, + optimizer=optimizer, + random_seed=random_seed, + step_lr_gamma=step_lr_gamma, + step_lr_step_size=step_lr_step_size, + training_batch_size=training_batch_size, + validation_batch_size=validation_batch_size, + warmup_cosine_lr_cycles=warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=warmup_cosine_lr_warmup_epochs, + weight_decay=weight_decay, + **kwargs, + ) + self.box_detections_per_image = box_detections_per_image + self.box_score_threshold = box_score_threshold + self.image_size = image_size + self.max_size = max_size + self.min_size = min_size + self.model_size = model_size + self.multi_scale = multi_scale + self.nms_iou_threshold = nms_iou_threshold + self.tile_grid_size = tile_grid_size + self.tile_overlap_ratio = tile_overlap_ratio + self.tile_predictions_nms_threshold = tile_predictions_nms_threshold + self.validation_iou_threshold = validation_iou_threshold + self.validation_metric_type = validation_metric_type + self.log_training_metrics = log_training_metrics + self.log_validation_loss = log_validation_loss + + def _to_rest_object(self) -> RestImageModelSettingsObjectDetection: + return RestImageModelSettingsObjectDetection( + advanced_settings=self.advanced_settings, + ams_gradient=self.ams_gradient, + beta1=self.beta1, + beta2=self.beta2, + checkpoint_frequency=self.checkpoint_frequency, + checkpoint_run_id=self.checkpoint_run_id, + distributed=self.distributed, + early_stopping=self.early_stopping, + early_stopping_delay=self.early_stopping_delay, + early_stopping_patience=self.early_stopping_patience, + enable_onnx_normalization=self.enable_onnx_normalization, + evaluation_frequency=self.evaluation_frequency, + gradient_accumulation_step=self.gradient_accumulation_step, + layers_to_freeze=self.layers_to_freeze, + learning_rate=self.learning_rate, + learning_rate_scheduler=self.learning_rate_scheduler, + model_name=self.model_name, + momentum=self.momentum, + nesterov=self.nesterov, + number_of_epochs=self.number_of_epochs, + number_of_workers=self.number_of_workers, + optimizer=self.optimizer, + random_seed=self.random_seed, + step_lr_gamma=self.step_lr_gamma, + step_lr_step_size=self.step_lr_step_size, + training_batch_size=self.training_batch_size, + validation_batch_size=self.validation_batch_size, + warmup_cosine_lr_cycles=self.warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=self.warmup_cosine_lr_warmup_epochs, + weight_decay=self.weight_decay, + box_detections_per_image=self.box_detections_per_image, + box_score_threshold=self.box_score_threshold, + image_size=self.image_size, + max_size=self.max_size, + min_size=self.min_size, + model_size=self.model_size, + multi_scale=self.multi_scale, + nms_iou_threshold=self.nms_iou_threshold, + tile_grid_size=self.tile_grid_size, + tile_overlap_ratio=self.tile_overlap_ratio, + tile_predictions_nms_threshold=self.tile_predictions_nms_threshold, + validation_iou_threshold=self.validation_iou_threshold, + validation_metric_type=self.validation_metric_type, + log_training_metrics=self.log_training_metrics, + log_validation_loss=self.log_validation_loss, + ) + + @classmethod + def _from_rest_object(cls, obj: RestImageModelSettingsObjectDetection) -> "ImageModelSettingsObjectDetection": + return cls( + advanced_settings=obj.advanced_settings, + ams_gradient=obj.ams_gradient, + beta1=obj.beta1, + beta2=obj.beta2, + checkpoint_frequency=obj.checkpoint_frequency, + checkpoint_run_id=obj.checkpoint_run_id, + distributed=obj.distributed, + early_stopping=obj.early_stopping, + early_stopping_delay=obj.early_stopping_delay, + early_stopping_patience=obj.early_stopping_patience, + enable_onnx_normalization=obj.enable_onnx_normalization, + evaluation_frequency=obj.evaluation_frequency, + gradient_accumulation_step=obj.gradient_accumulation_step, + layers_to_freeze=obj.layers_to_freeze, + learning_rate=obj.learning_rate, + learning_rate_scheduler=obj.learning_rate_scheduler, + model_name=obj.model_name, + momentum=obj.momentum, + nesterov=obj.nesterov, + number_of_epochs=obj.number_of_epochs, + number_of_workers=obj.number_of_workers, + optimizer=obj.optimizer, + random_seed=obj.random_seed, + step_lr_gamma=obj.step_lr_gamma, + step_lr_step_size=obj.step_lr_step_size, + training_batch_size=obj.training_batch_size, + validation_batch_size=obj.validation_batch_size, + warmup_cosine_lr_cycles=obj.warmup_cosine_lr_cycles, + warmup_cosine_lr_warmup_epochs=obj.warmup_cosine_lr_warmup_epochs, + weight_decay=obj.weight_decay, + box_detections_per_image=obj.box_detections_per_image, + box_score_threshold=obj.box_score_threshold, + image_size=obj.image_size, + max_size=obj.max_size, + min_size=obj.min_size, + model_size=obj.model_size, + multi_scale=obj.multi_scale, + nms_iou_threshold=obj.nms_iou_threshold, + tile_grid_size=obj.tile_grid_size, + tile_overlap_ratio=obj.tile_overlap_ratio, + tile_predictions_nms_threshold=obj.tile_predictions_nms_threshold, + validation_iou_threshold=obj.validation_iou_threshold, + validation_metric_type=obj.validation_metric_type, + log_training_metrics=obj.log_training_metrics, + log_validation_loss=obj.log_validation_loss, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageModelSettingsObjectDetection): + return NotImplemented + + return ( + super().__eq__(other) + and self.box_detections_per_image == other.box_detections_per_image + and self.box_score_threshold == other.box_score_threshold + and self.image_size == other.image_size + and self.max_size == other.max_size + and self.min_size == other.min_size + and self.model_size == other.model_size + and self.multi_scale == other.multi_scale + and self.nms_iou_threshold == other.nms_iou_threshold + and self.tile_grid_size == other.tile_grid_size + and self.tile_overlap_ratio == other.tile_overlap_ratio + and self.tile_predictions_nms_threshold == other.tile_predictions_nms_threshold + and self.validation_iou_threshold == other.validation_iou_threshold + and self.validation_metric_type == other.validation_metric_type + and self.log_training_metrics == other.log_training_metrics + and self.log_validation_loss == other.log_validation_loss + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_job.py new file mode 100644 index 00000000..f8d070d2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_job.py @@ -0,0 +1,240 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageObjectDetection as RestImageObjectDetection +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, ObjectDetectionPrimaryMetrics, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.image.automl_image_object_detection_base import AutoMLImageObjectDetectionBase +from azure.ai.ml.entities._job.automl.image.image_limit_settings import ImageLimitSettings +from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsObjectDetection +from azure.ai.ml.entities._job.automl.image.image_sweep_settings import ImageSweepSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ImageObjectDetectionJob(AutoMLImageObjectDetectionBase): + """Configuration for AutoML Image Object Detection job. + + :keyword primary_metric: The primary metric to use for optimization. + :paramtype primary_metric: Optional[str, ~azure.ai.ml.ObjectDetectionPrimaryMetrics] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_object_detection_job] + :end-before: [END automl.automl_image_job.image_object_detection_job] + :language: python + :dedent: 8 + :caption: creating an automl image object detection job + """ + + _DEFAULT_PRIMARY_METRIC = ObjectDetectionPrimaryMetrics.MEAN_AVERAGE_PRECISION + + def __init__( + self, + *, + primary_metric: Optional[Union[str, ObjectDetectionPrimaryMetrics]] = None, + **kwargs: Any, + ) -> None: + + # Extract any super class init settings + limits = kwargs.pop("limits", None) + sweep = kwargs.pop("sweep", None) + training_parameters = kwargs.pop("training_parameters", None) + search_space = kwargs.pop("search_space", None) + + super().__init__( + task_type=TaskType.IMAGE_OBJECT_DETECTION, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=search_space, + **kwargs, + ) + + self.primary_metric = primary_metric or ImageObjectDetectionJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Union[str, ObjectDetectionPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ObjectDetectionPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ImageObjectDetectionJob._DEFAULT_PRIMARY_METRIC + if value is None + else ObjectDetectionPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + image_object_detection_task = RestImageObjectDetection( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + model_settings=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(image_object_detection_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=image_object_detection_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ImageObjectDetectionJob": + properties: RestAutoMLJob = obj.properties + task_details: RestImageObjectDetection = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + image_object_detection_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + limits=( + ImageLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + sweep=( + ImageSweepSettings._from_rest_object(task_details.sweep_settings) + if task_details.sweep_settings + else None + ), + training_parameters=( + ImageModelSettingsObjectDetection._from_rest_object(task_details.model_settings) + if task_details.model_settings + else None + ), + search_space=cls._get_search_space_from_str(task_details.search_space), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + image_object_detection_job._restore_data_inputs() + + return image_object_detection_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ImageObjectDetectionJob": + from azure.ai.ml._schema.automl.image_vertical.image_object_detection import ImageObjectDetectionSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageObjectDetectionNodeSchema + + if kwargs.pop("inside_pipeline", False): + if context.get("inside_pipeline", None) is None: + context["inside_pipeline"] = True + loaded_data = load_from_dict( + ImageObjectDetectionNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(ImageObjectDetectionSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ImageObjectDetectionJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + } + job = ImageObjectDetectionJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.image_vertical.image_object_detection import ImageObjectDetectionSchema + from azure.ai.ml._schema.pipeline.automl_node import ImageObjectDetectionNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = ImageObjectDetectionNodeSchema( + context={BASE_PATH_CONTEXT_KEY: "./", "inside_pipeline": True} + ).dump(self) + else: + schema_dict = ImageObjectDetectionSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageObjectDetectionJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_search_space.py new file mode 100644 index 00000000..a9004d1e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_object_detection_search_space.py @@ -0,0 +1,899 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=R0902,too-many-locals + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageModelDistributionSettingsObjectDetection +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.search_space_utils import _convert_from_rest_object, _convert_to_rest_object +from azure.ai.ml.entities._mixins import RestTranslatableMixin +from azure.ai.ml.sweep import ( + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, +) + + +class ImageObjectDetectionSearchSpace(RestTranslatableMixin): + """Search space for AutoML Image Object Detection and Image Instance Segmentation tasks. + + :param ams_gradient: Enable AMSGrad when optimizer is 'adam' or 'adamw'. + :type ams_gradient: bool or ~azure.ai.ml.entities.SweepDistribution + :param beta1: Value of 'beta1' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :type beta1: float or ~azure.ai.ml.entities.SweepDistribution + :param beta2: Value of 'beta2' when optimizer is 'adam' or 'adamw'. Must be a float in the + range [0, 1]. + :type beta2: float or ~azure.ai.ml.entities.SweepDistribution + :param distributed: Whether to use distributer training. + :type distributed: bool or ~azure.ai.ml.entities.SweepDistribution + :param early_stopping: Enable early stopping logic during training. + :type early_stopping: bool or ~azure.ai.ml.entities.SweepDistribution + :param early_stopping_delay: Minimum number of epochs or validation evaluations to wait + before primary metric improvement + is tracked for early stopping. Must be a positive integer. + :type early_stopping_delay: int or ~azure.ai.ml.entities.SweepDistribution + :param early_stopping_patience: Minimum number of epochs or validation evaluations with no + primary metric improvement before the run is stopped. Must be a positive integer. + :type early_stopping_patience: int or ~azure.ai.ml.entities.SweepDistribution + :param enable_onnx_normalization: Enable normalization when exporting ONNX model. + :type enable_onnx_normalization: bool or ~azure.ai.ml.entities.SweepDistribution + :param evaluation_frequency: Frequency to evaluate validation dataset to get metric scores. + Must be a positive integer. + :type evaluation_frequency: int or ~azure.ai.ml.entities.SweepDistribution + :param gradient_accumulation_step: Gradient accumulation means running a configured number of + "GradAccumulationStep" steps without updating the model weights while accumulating the gradients of those steps, + and then using the accumulated gradients to compute the weight updates. Must be a positive integer. + :type gradient_accumulation_step: int or ~azure.ai.ml.entities.SweepDistribution + :param layers_to_freeze: Number of layers to freeze for the model. Must be a positive + integer. For instance, passing 2 as value for 'seresnext' means freezing layer0 and layer1. + For a full list of models supported and details on layer freeze, please + see: https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters#model-agnostic-hyperparameters. # pylint: disable=line-too-long + :type layers_to_freeze: int or ~azure.ai.ml.entities.SweepDistribution + :param learning_rate: Initial learning rate. Must be a float in the range [0, 1]. + :type learning_rate: float or ~azure.ai.ml.entities.SweepDistribution + :param learning_rate_scheduler: Type of learning rate scheduler. Must be 'warmup_cosine' or + 'step'. + :type learning_rate_scheduler: str or ~azure.ai.ml.entities.SweepDistribution + :param model_name: Name of the model to use for training. + For more information on the available models please visit the official documentation: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-image-models. + :type model_name: str or ~azure.ai.ml.entities.SweepDistribution + :param momentum: Value of momentum when optimizer is 'sgd'. Must be a float in the range [0, + 1]. + :type momentum: float or ~azure.ai.ml.entities.SweepDistribution + :param nesterov: Enable nesterov when optimizer is 'sgd'. + :type nesterov: bool or ~azure.ai.ml.entities.SweepDistribution + :param number_of_epochs: Number of training epochs. Must be a positive integer. + :type number_of_epochs: int or ~azure.ai.ml.entities.SweepDistribution + :param number_of_workers: Number of data loader workers. Must be a non-negative integer. + :type number_of_workers: int or ~azure.ai.ml.entities.SweepDistribution + :param optimizer: Type of optimizer. Must be either 'sgd', 'adam', or 'adamw'. + :type optimizer: str or ~azure.ai.ml.entities.SweepDistribution + :param random_seed: Random seed to be used when using deterministic training. + :type random_seed: int or ~azure.ai.ml.entities.SweepDistribution + :param step_lr_gamma: Value of gamma when learning rate scheduler is 'step'. Must be a float + in the range [0, 1]. + :type step_lr_gamma: float or ~azure.ai.ml.entities.SweepDistribution + :param step_lr_step_size: Value of step size when learning rate scheduler is 'step'. Must be + a positive integer. + :type step_lr_step_size: int or ~azure.ai.ml.entities.SweepDistribution + :param training_batch_size: Training batch size. Must be a positive integer. + :type training_batch_size: int or ~azure.ai.ml.entities.SweepDistribution + :param validation_batch_size: Validation batch size. Must be a positive integer. + :type validation_batch_size: int or ~azure.ai.ml.entities.SweepDistribution + :param warmup_cosine_lr_cycles: Value of cosine cycle when learning rate scheduler is + 'warmup_cosine'. Must be a float in the range [0, 1]. + :type warmup_cosine_lr_cycles: float or ~azure.ai.ml.entities.SweepDistribution + :param warmup_cosine_lr_warmup_epochs: Value of warmup epochs when learning rate scheduler is + 'warmup_cosine'. Must be a positive integer. + :type warmup_cosine_lr_warmup_epochs: int or ~azure.ai.ml.entities.SweepDistribution + :param weight_decay: Value of weight decay when optimizer is 'sgd', 'adam', or 'adamw'. Must + be a float in the range[0, 1]. + :type weight_decay: int or ~azure.ai.ml.entities.SweepDistribution + :param box_detections_per_image: Maximum number of detections per image, for all classes. + Must be a positive integer. Note: This settings is not supported for the 'yolov5' algorithm. + :type box_detections_per_image: int or ~azure.ai.ml.entities.SweepDistribution + :param box_score_threshold: During inference, only return proposals with a classification + score greater than BoxScoreThreshold. Must be a float in the range[0, 1]. + :type box_score_threshold: float or ~azure.ai.ml.entities.SweepDistribution + :param image_size: Image size for train and validation. Must be a positive integer. + Note: The training run may get into CUDA OOM if the size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type image_size: int or ~azure.ai.ml.entities.SweepDistribution + :param max_size: Maximum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type max_size: int or ~azure.ai.ml.entities.SweepDistribution + :param min_size: Minimum size of the image to be rescaled before feeding it to the backbone. + Must be a positive integer. Note: training run may get into CUDA OOM if the size is too big. + Note: This settings is not supported for the 'yolov5' algorithm. + :type min_size: int or ~azure.ai.ml.entities.SweepDistribution + :param model_size: Model size. Must be 'small', 'medium', 'large', or 'extra_large'. + Note: training run may get into CUDA OOM if the model size is too big. + Note: This settings is only supported for the 'yolov5' algorithm. + :type model_size: str or ~azure.ai.ml.entities.SweepDistribution + :param multi_scale: Enable multi-scale image by varying image size by +/- 50%. + Note: training run may get into CUDA OOM if no sufficient GPU memory. + Note: This settings is only supported for the 'yolov5' algorithm. + :type multi_scale: bool or ~azure.ai.ml.entities.SweepDistribution + :param nms_iou_threshold: IOU threshold used during inference in NMS post processing. Must be + float in the range [0, 1]. + :type nms_iou_threshold: float or ~azure.ai.ml.entities.SweepDistribution + :param tile_grid_size: The grid size to use for tiling each image. Note: TileGridSize must + not be None to enable small object detection logic. A string containing two integers in mxn format. + :type tile_grid_size: str or ~azure.ai.ml.entities.SweepDistribution + :param tile_overlap_ratio: Overlap ratio between adjacent tiles in each dimension. Must be + float in the range [0, 1). + :type tile_overlap_ratio: float or ~azure.ai.ml.entities.SweepDistribution + :param tile_predictions_nms_threshold: The IOU threshold to use to perform NMS while merging + predictions from tiles and image. Used in validation/ inference. Must be float in the range [0, 1]. + NMS: Non-maximum suppression. + :type tile_predictions_nms_threshold: float or ~azure.ai.ml.entities.SweepDistribution + :param validation_iou_threshold: IOU threshold to use when computing validation metric. Must + be float in the range [0, 1]. + :type validation_iou_threshold: float or ~azure.ai.ml.entities.SweepDistribution + :param validation_metric_type: Metric computation method to use for validation metrics. Must + be 'none', 'coco', 'voc', or 'coco_voc'. + :type validation_metric_type: str or ~azure.ai.ml.entities.SweepDistribution + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_object_detection_search_space] + :end-before: [END automl.automl_image_job.image_object_detection_search_space] + :language: python + :dedent: 8 + :caption: Defining an automl image object detection or instance segmentation search space + """ + + def __init__( + self, + *, + ams_gradient: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + beta1: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + beta2: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + distributed: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + early_stopping: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + early_stopping_delay: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + early_stopping_patience: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + enable_onnx_normalization: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + evaluation_frequency: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + gradient_accumulation_step: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + layers_to_freeze: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + learning_rate: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + learning_rate_scheduler: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + model_name: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + momentum: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + nesterov: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + number_of_epochs: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + number_of_workers: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + optimizer: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + random_seed: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + step_lr_gamma: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + step_lr_step_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + training_batch_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + validation_batch_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + warmup_cosine_lr_cycles: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + warmup_cosine_lr_warmup_epochs: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + weight_decay: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + box_detections_per_image: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + box_score_threshold: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + image_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + max_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + min_size: Optional[ + Union[ + int, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + model_size: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + multi_scale: Optional[ + Union[ + bool, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + nms_iou_threshold: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + tile_grid_size: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + tile_overlap_ratio: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + tile_predictions_nms_threshold: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + validation_iou_threshold: Optional[ + Union[ + float, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ] + ] = None, + validation_metric_type: Optional[ + Union[ + str, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + ] + ] = None, + ) -> None: + self.ams_gradient = ams_gradient + self.beta1 = beta1 + self.beta2 = beta2 + self.distributed = distributed + self.early_stopping = early_stopping + self.early_stopping_delay = early_stopping_delay + self.early_stopping_patience = early_stopping_patience + self.enable_onnx_normalization = enable_onnx_normalization + self.evaluation_frequency = evaluation_frequency + self.gradient_accumulation_step = gradient_accumulation_step + self.layers_to_freeze = layers_to_freeze + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.momentum = momentum + self.nesterov = nesterov + self.number_of_epochs = number_of_epochs + self.number_of_workers = number_of_workers + self.optimizer = optimizer + self.random_seed = random_seed + self.step_lr_gamma = step_lr_gamma + self.step_lr_step_size = step_lr_step_size + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_cosine_lr_cycles = warmup_cosine_lr_cycles + self.warmup_cosine_lr_warmup_epochs = warmup_cosine_lr_warmup_epochs + self.weight_decay = weight_decay + self.box_detections_per_image = box_detections_per_image + self.box_score_threshold = box_score_threshold + self.image_size = image_size + self.max_size = max_size + self.min_size = min_size + self.model_size = model_size + self.multi_scale = multi_scale + self.nms_iou_threshold = nms_iou_threshold + self.tile_grid_size = tile_grid_size + self.tile_overlap_ratio = tile_overlap_ratio + self.tile_predictions_nms_threshold = tile_predictions_nms_threshold + self.validation_iou_threshold = validation_iou_threshold + self.validation_metric_type = validation_metric_type + + def _to_rest_object(self) -> ImageModelDistributionSettingsObjectDetection: + return ImageModelDistributionSettingsObjectDetection( + ams_gradient=_convert_to_rest_object(self.ams_gradient) if self.ams_gradient is not None else None, + beta1=_convert_to_rest_object(self.beta1) if self.beta1 is not None else None, + beta2=_convert_to_rest_object(self.beta2) if self.beta2 is not None else None, + distributed=_convert_to_rest_object(self.distributed) if self.distributed is not None else None, + early_stopping=_convert_to_rest_object(self.early_stopping) if self.early_stopping is not None else None, + early_stopping_delay=( + _convert_to_rest_object(self.early_stopping_delay) if self.early_stopping_delay is not None else None + ), + early_stopping_patience=( + _convert_to_rest_object(self.early_stopping_patience) + if self.early_stopping_patience is not None + else None + ), + enable_onnx_normalization=( + _convert_to_rest_object(self.enable_onnx_normalization) + if self.enable_onnx_normalization is not None + else None + ), + evaluation_frequency=( + _convert_to_rest_object(self.evaluation_frequency) if self.evaluation_frequency is not None else None + ), + gradient_accumulation_step=( + _convert_to_rest_object(self.gradient_accumulation_step) + if self.gradient_accumulation_step is not None + else None + ), + layers_to_freeze=( + _convert_to_rest_object(self.layers_to_freeze) if self.layers_to_freeze is not None else None + ), + learning_rate=_convert_to_rest_object(self.learning_rate) if self.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_to_rest_object(self.learning_rate_scheduler) + if self.learning_rate_scheduler is not None + else None + ), + model_name=_convert_to_rest_object(self.model_name) if self.model_name is not None else None, + momentum=_convert_to_rest_object(self.momentum) if self.momentum is not None else None, + nesterov=_convert_to_rest_object(self.nesterov) if self.nesterov is not None else None, + number_of_epochs=( + _convert_to_rest_object(self.number_of_epochs) if self.number_of_epochs is not None else None + ), + number_of_workers=( + _convert_to_rest_object(self.number_of_workers) if self.number_of_workers is not None else None + ), + optimizer=_convert_to_rest_object(self.optimizer) if self.optimizer is not None else None, + random_seed=_convert_to_rest_object(self.random_seed) if self.random_seed is not None else None, + step_lr_gamma=_convert_to_rest_object(self.step_lr_gamma) if self.step_lr_gamma is not None else None, + step_lr_step_size=( + _convert_to_rest_object(self.step_lr_step_size) if self.step_lr_step_size is not None else None + ), + training_batch_size=( + _convert_to_rest_object(self.training_batch_size) if self.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_to_rest_object(self.validation_batch_size) if self.validation_batch_size is not None else None + ), + warmup_cosine_lr_cycles=( + _convert_to_rest_object(self.warmup_cosine_lr_cycles) + if self.warmup_cosine_lr_cycles is not None + else None + ), + warmup_cosine_lr_warmup_epochs=( + _convert_to_rest_object(self.warmup_cosine_lr_warmup_epochs) + if self.warmup_cosine_lr_warmup_epochs is not None + else None + ), + weight_decay=_convert_to_rest_object(self.weight_decay) if self.weight_decay is not None else None, + box_detections_per_image=( + _convert_to_rest_object(self.box_detections_per_image) + if self.box_detections_per_image is not None + else None + ), + box_score_threshold=( + _convert_to_rest_object(self.box_score_threshold) if self.box_score_threshold is not None else None + ), + image_size=_convert_to_rest_object(self.image_size) if self.image_size is not None else None, + max_size=_convert_to_rest_object(self.max_size) if self.max_size is not None else None, + min_size=_convert_to_rest_object(self.min_size) if self.min_size is not None else None, + model_size=_convert_to_rest_object(self.model_size) if self.model_size is not None else None, + multi_scale=_convert_to_rest_object(self.multi_scale) if self.multi_scale is not None else None, + nms_iou_threshold=( + _convert_to_rest_object(self.nms_iou_threshold) if self.nms_iou_threshold is not None else None + ), + tile_grid_size=_convert_to_rest_object(self.tile_grid_size) if self.tile_grid_size is not None else None, + tile_overlap_ratio=( + _convert_to_rest_object(self.tile_overlap_ratio) if self.tile_overlap_ratio is not None else None + ), + tile_predictions_nms_threshold=( + _convert_to_rest_object(self.tile_predictions_nms_threshold) + if self.tile_predictions_nms_threshold is not None + else None + ), + validation_iou_threshold=( + _convert_to_rest_object(self.validation_iou_threshold) + if self.validation_iou_threshold is not None + else None + ), + validation_metric_type=( + _convert_to_rest_object(self.validation_metric_type) + if self.validation_metric_type is not None + else None + ), + ) + + @classmethod + def _from_rest_object(cls, obj: ImageModelDistributionSettingsObjectDetection) -> "ImageObjectDetectionSearchSpace": + return cls( + ams_gradient=_convert_from_rest_object(obj.ams_gradient) if obj.ams_gradient is not None else None, + beta1=_convert_from_rest_object(obj.beta1) if obj.beta1 is not None else None, + beta2=_convert_from_rest_object(obj.beta2) if obj.beta2 is not None else None, + distributed=_convert_from_rest_object(obj.distributed) if obj.distributed is not None else None, + early_stopping=_convert_from_rest_object(obj.early_stopping) if obj.early_stopping is not None else None, + early_stopping_delay=( + _convert_from_rest_object(obj.early_stopping_delay) if obj.early_stopping_delay is not None else None + ), + early_stopping_patience=( + _convert_from_rest_object(obj.early_stopping_patience) + if obj.early_stopping_patience is not None + else None + ), + enable_onnx_normalization=( + _convert_from_rest_object(obj.enable_onnx_normalization) + if obj.enable_onnx_normalization is not None + else None + ), + evaluation_frequency=( + _convert_from_rest_object(obj.evaluation_frequency) if obj.evaluation_frequency is not None else None + ), + gradient_accumulation_step=( + _convert_from_rest_object(obj.gradient_accumulation_step) + if obj.gradient_accumulation_step is not None + else None + ), + layers_to_freeze=( + _convert_from_rest_object(obj.layers_to_freeze) if obj.layers_to_freeze is not None else None + ), + learning_rate=_convert_from_rest_object(obj.learning_rate) if obj.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_from_rest_object(obj.learning_rate_scheduler) + if obj.learning_rate_scheduler is not None + else None + ), + model_name=_convert_from_rest_object(obj.model_name) if obj.model_name is not None else None, + momentum=_convert_from_rest_object(obj.momentum) if obj.momentum is not None else None, + nesterov=_convert_from_rest_object(obj.nesterov) if obj.nesterov is not None else None, + number_of_epochs=( + _convert_from_rest_object(obj.number_of_epochs) if obj.number_of_epochs is not None else None + ), + number_of_workers=( + _convert_from_rest_object(obj.number_of_workers) if obj.number_of_workers is not None else None + ), + optimizer=_convert_from_rest_object(obj.optimizer) if obj.optimizer is not None else None, + random_seed=_convert_from_rest_object(obj.random_seed) if obj.random_seed is not None else None, + step_lr_gamma=_convert_from_rest_object(obj.step_lr_gamma) if obj.step_lr_gamma is not None else None, + step_lr_step_size=( + _convert_from_rest_object(obj.step_lr_step_size) if obj.step_lr_step_size is not None else None + ), + training_batch_size=( + _convert_from_rest_object(obj.training_batch_size) if obj.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_from_rest_object(obj.validation_batch_size) if obj.validation_batch_size is not None else None + ), + warmup_cosine_lr_cycles=( + _convert_from_rest_object(obj.warmup_cosine_lr_cycles) + if obj.warmup_cosine_lr_cycles is not None + else None + ), + warmup_cosine_lr_warmup_epochs=( + _convert_from_rest_object(obj.warmup_cosine_lr_warmup_epochs) + if obj.warmup_cosine_lr_warmup_epochs is not None + else None + ), + weight_decay=_convert_from_rest_object(obj.weight_decay) if obj.weight_decay is not None else None, + box_detections_per_image=( + _convert_from_rest_object(obj.box_detections_per_image) + if obj.box_detections_per_image is not None + else None + ), + box_score_threshold=( + _convert_from_rest_object(obj.box_score_threshold) if obj.box_score_threshold is not None else None + ), + image_size=_convert_from_rest_object(obj.image_size) if obj.image_size is not None else None, + max_size=_convert_from_rest_object(obj.max_size) if obj.max_size is not None else None, + min_size=_convert_from_rest_object(obj.min_size) if obj.min_size is not None else None, + model_size=_convert_from_rest_object(obj.model_size) if obj.model_size is not None else None, + multi_scale=_convert_from_rest_object(obj.multi_scale) if obj.multi_scale is not None else None, + nms_iou_threshold=( + _convert_from_rest_object(obj.nms_iou_threshold) if obj.nms_iou_threshold is not None else None + ), + tile_grid_size=_convert_from_rest_object(obj.tile_grid_size) if obj.tile_grid_size is not None else None, + tile_overlap_ratio=( + _convert_from_rest_object(obj.tile_overlap_ratio) if obj.tile_overlap_ratio is not None else None + ), + tile_predictions_nms_threshold=( + _convert_from_rest_object(obj.tile_predictions_nms_threshold) + if obj.tile_predictions_nms_threshold is not None + else None + ), + validation_iou_threshold=( + _convert_from_rest_object(obj.validation_iou_threshold) + if obj.validation_iou_threshold is not None + else None + ), + validation_metric_type=( + _convert_from_rest_object(obj.validation_metric_type) + if obj.validation_metric_type is not None + else None + ), + ) + + @classmethod + def _from_search_space_object(cls, obj: SearchSpace) -> "ImageObjectDetectionSearchSpace": + return cls( + ams_gradient=obj.ams_gradient if hasattr(obj, "ams_gradient") else None, + beta1=obj.beta1 if hasattr(obj, "beta1") else None, + beta2=obj.beta2 if hasattr(obj, "beta2") else None, + distributed=obj.distributed if hasattr(obj, "distributed") else None, + early_stopping=obj.early_stopping if hasattr(obj, "early_stopping") else None, + early_stopping_delay=obj.early_stopping_delay if hasattr(obj, "early_stopping_delay") else None, + early_stopping_patience=obj.early_stopping_patience if hasattr(obj, "early_stopping_patience") else None, + enable_onnx_normalization=( + obj.enable_onnx_normalization if hasattr(obj, "enable_onnx_normalization") else None + ), + evaluation_frequency=obj.evaluation_frequency if hasattr(obj, "evaluation_frequency") else None, + gradient_accumulation_step=( + obj.gradient_accumulation_step if hasattr(obj, "gradient_accumulation_step") else None + ), + layers_to_freeze=obj.layers_to_freeze if hasattr(obj, "layers_to_freeze") else None, + learning_rate=obj.learning_rate if hasattr(obj, "learning_rate") else None, + learning_rate_scheduler=obj.learning_rate_scheduler if hasattr(obj, "learning_rate_scheduler") else None, + model_name=obj.model_name if hasattr(obj, "model_name") else None, + momentum=obj.momentum if hasattr(obj, "momentum") else None, + nesterov=obj.nesterov if hasattr(obj, "nesterov") else None, + number_of_epochs=obj.number_of_epochs if hasattr(obj, "number_of_epochs") else None, + number_of_workers=obj.number_of_workers if hasattr(obj, "number_of_workers") else None, + optimizer=obj.optimizer if hasattr(obj, "optimizer") else None, + random_seed=obj.random_seed if hasattr(obj, "random_seed") else None, + step_lr_gamma=obj.step_lr_gamma if hasattr(obj, "step_lr_gamma") else None, + step_lr_step_size=obj.step_lr_step_size if hasattr(obj, "step_lr_step_size") else None, + training_batch_size=obj.training_batch_size if hasattr(obj, "training_batch_size") else None, + validation_batch_size=obj.validation_batch_size if hasattr(obj, "validation_batch_size") else None, + warmup_cosine_lr_cycles=obj.warmup_cosine_lr_cycles if hasattr(obj, "warmup_cosine_lr_cycles") else None, + warmup_cosine_lr_warmup_epochs=( + obj.warmup_cosine_lr_warmup_epochs if hasattr(obj, "warmup_cosine_lr_warmup_epochs") else None + ), + weight_decay=obj.weight_decay if hasattr(obj, "weight_decay") else None, + box_detections_per_image=obj.box_detections_per_image if hasattr(obj, "box_detections_per_image") else None, + box_score_threshold=obj.box_score_threshold if hasattr(obj, "box_score_threshold") else None, + image_size=obj.image_size if hasattr(obj, "image_size") else None, + max_size=obj.max_size if hasattr(obj, "max_size") else None, + min_size=obj.min_size if hasattr(obj, "min_size") else None, + model_size=obj.model_size if hasattr(obj, "model_size") else None, + multi_scale=obj.multi_scale if hasattr(obj, "multi_scale") else None, + nms_iou_threshold=obj.nms_iou_threshold if hasattr(obj, "nms_iou_threshold") else None, + tile_grid_size=obj.tile_grid_size if hasattr(obj, "tile_grid_size") else None, + tile_overlap_ratio=obj.tile_overlap_ratio if hasattr(obj, "tile_overlap_ratio") else None, + tile_predictions_nms_threshold=( + obj.tile_predictions_nms_threshold if hasattr(obj, "tile_predictions_nms_threshold") else None + ), + validation_iou_threshold=obj.validation_iou_threshold if hasattr(obj, "validation_iou_threshold") else None, + validation_metric_type=obj.validation_metric_type if hasattr(obj, "validation_metric_type") else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageObjectDetectionSearchSpace): + return NotImplemented + + return ( + self.ams_gradient == other.ams_gradient + and self.beta1 == other.beta1 + and self.beta2 == other.beta2 + and self.distributed == other.distributed + and self.early_stopping == other.early_stopping + and self.early_stopping_delay == other.early_stopping_delay + and self.early_stopping_patience == other.early_stopping_patience + and self.enable_onnx_normalization == other.enable_onnx_normalization + and self.evaluation_frequency == other.evaluation_frequency + and self.gradient_accumulation_step == other.gradient_accumulation_step + and self.layers_to_freeze == other.layers_to_freeze + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.momentum == other.momentum + and self.nesterov == other.nesterov + and self.number_of_epochs == other.number_of_epochs + and self.number_of_workers == other.number_of_workers + and self.optimizer == other.optimizer + and self.random_seed == other.random_seed + and self.step_lr_gamma == other.step_lr_gamma + and self.step_lr_step_size == other.step_lr_step_size + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_cosine_lr_cycles == other.warmup_cosine_lr_cycles + and self.warmup_cosine_lr_warmup_epochs == other.warmup_cosine_lr_warmup_epochs + and self.weight_decay == other.weight_decay + and self.box_detections_per_image == other.box_detections_per_image + and self.box_score_threshold == other.box_score_threshold + and self.image_size == other.image_size + and self.max_size == other.max_size + and self.min_size == other.min_size + and self.model_size == other.model_size + and self.multi_scale == other.multi_scale + and self.nms_iou_threshold == other.nms_iou_threshold + and self.tile_grid_size == other.tile_grid_size + and self.tile_overlap_ratio == other.tile_overlap_ratio + and self.tile_predictions_nms_threshold == other.tile_predictions_nms_threshold + and self.validation_iou_threshold == other.validation_iou_threshold + and self.validation_metric_type == other.validation_metric_type + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_sweep_settings.py new file mode 100644 index 00000000..b5e9ffaf --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/image/image_sweep_settings.py @@ -0,0 +1,86 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ImageSweepSettings as RestImageSweepSettings +from azure.ai.ml._restclient.v2023_04_01_preview.models import SamplingAlgorithmType +from azure.ai.ml.entities._job.sweep.early_termination_policy import ( + BanditPolicy, + EarlyTerminationPolicy, + MedianStoppingPolicy, + TruncationSelectionPolicy, +) +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ImageSweepSettings(RestTranslatableMixin): + """Sweep settings for all AutoML Image Verticals. + + :keyword sampling_algorithm: Required. Type of the hyperparameter sampling. + algorithms. Possible values include: "Grid", "Random", "Bayesian". + :paramtype sampling_algorithm: Union[ + str, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.GRID, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.BAYESIAN, + ~azure.mgmt.machinelearningservices.models.SamplingAlgorithmType.RANDOM + + ] + :keyword early_termination: Type of early termination policy. + :paramtype early_termination: Union[ + + ~azure.mgmt.machinelearningservices.models.BanditPolicy, + ~azure.mgmt.machinelearningservices.models.MedianStoppingPolicy, + ~azure.mgmt.machinelearningservices.models.TruncationSelectionPolicy + + ] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.automl_image_job.image_sweep_settings] + :end-before: [END automl.automl_image_job.image_sweep_settings] + :language: python + :dedent: 8 + :caption: Defining the sweep settings for an automl image job. + """ + + def __init__( + self, + *, + sampling_algorithm: Union[ + str, SamplingAlgorithmType.GRID, SamplingAlgorithmType.BAYESIAN, SamplingAlgorithmType.RANDOM + ], + early_termination: Optional[ + Union[EarlyTerminationPolicy, BanditPolicy, MedianStoppingPolicy, TruncationSelectionPolicy] + ] = None, + ): + self.sampling_algorithm = sampling_algorithm + self.early_termination = early_termination + + def _to_rest_object(self) -> RestImageSweepSettings: + return RestImageSweepSettings( + sampling_algorithm=self.sampling_algorithm, + early_termination=self.early_termination._to_rest_object() if self.early_termination else None, + ) + + @classmethod + def _from_rest_object(cls, obj: RestImageSweepSettings) -> "ImageSweepSettings": + return cls( + sampling_algorithm=obj.sampling_algorithm, + early_termination=( + EarlyTerminationPolicy._from_rest_object(obj.early_termination) if obj.early_termination else None + ), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageSweepSettings): + return NotImplemented + + return self.sampling_algorithm == other.sampling_algorithm and self.early_termination == other.early_termination + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py new file mode 100644 index 00000000..9be7b483 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py @@ -0,0 +1,25 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from .automl_nlp_job import AutoMLNLPJob +from .nlp_featurization_settings import NlpFeaturizationSettings +from .nlp_fixed_parameters import NlpFixedParameters +from .nlp_limit_settings import NlpLimitSettings +from .nlp_search_space import NlpSearchSpace +from .nlp_sweep_settings import NlpSweepSettings +from .text_classification_job import TextClassificationJob +from .text_classification_multilabel_job import TextClassificationMultilabelJob +from .text_ner_job import TextNerJob + +__all__ = [ + "AutoMLNLPJob", + "NlpFeaturizationSettings", + "NlpFixedParameters", + "NlpLimitSettings", + "NlpSearchSpace", + "NlpSweepSettings", + "TextClassificationJob", + "TextClassificationMultilabelJob", + "TextNerJob", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py new file mode 100644 index 00000000..f0b3baa8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py @@ -0,0 +1,467 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from abc import ABC +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LogVerbosity, + NlpLearningRateScheduler, + SamplingAlgorithmType, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.automl_vertical import AutoMLVertical +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_search_space import NlpSearchSpace +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.utils import cast_to_specific_search_space +from azure.ai.ml.entities._job.sweep.early_termination_policy import EarlyTerminationPolicy +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +# pylint: disable=too-many-instance-attributes,protected-access +class AutoMLNLPJob(AutoMLVertical, ABC): + """Base class for AutoML NLP jobs. + + You should not instantiate this class directly. Instead you should + create classes for specific NLP Jobs. + + :param task_type: NLP task type, must be one of 'TextClassification', + 'TextClassificationMultilabel', or 'TextNER' + :type task_type: str + :param primary_metric: Primary metric to display from NLP job + :type primary_metric: str + :param training_data: Training data + :type training_data: Input + :param validation_data: Validation data + :type validation_data: Input + :param target_column_name: Column name of the target column, defaults to None + :type target_column_name: Optional[str] + :param log_verbosity: The degree of verbosity used in logging, defaults to None, + must be one of 'NotSet', 'Debug', 'Info', 'Warning', 'Error', 'Critical', or None + :type log_verbosity: Optional[str] + :param featurization: Featurization settings used for NLP job, defaults to None + :type featurization: Optional[~azure.ai.ml.automl.NlpFeaturizationSettings] + :param limits: Limit settings for NLP jobs, defaults to None + :type limits: Optional[~azure.ai.ml.automl.NlpLimitSettings] + :param sweep: Sweep settings used for NLP job, defaults to None + :type sweep: Optional[~azure.ai.ml.automl.NlpSweepSettings] + :param training_parameters: Fixed parameters for the training of all candidates. + , defaults to None + :type training_parameters: Optional[~azure.ai.ml.automl.NlpFixedParameters] + :param search_space: Search space(s) to sweep over for NLP sweep jobs, defaults to None + :type search_space: Optional[List[~azure.ai.ml.automl.NlpSearchSpace]] + """ + + def __init__( + self, + *, + task_type: str, + primary_metric: str, + training_data: Optional[Input], + validation_data: Optional[Input], + target_column_name: Optional[str] = None, + log_verbosity: Optional[str] = None, + featurization: Optional[NlpFeaturizationSettings] = None, + limits: Optional[NlpLimitSettings] = None, + sweep: Optional[NlpSweepSettings] = None, + training_parameters: Optional[NlpFixedParameters] = None, + search_space: Optional[List[NlpSearchSpace]] = None, + **kwargs: Any, + ): + self._training_parameters: Optional[NlpFixedParameters] = None + + super().__init__( + task_type, training_data=training_data, validation_data=validation_data, **kwargs # type: ignore + ) + self.log_verbosity = log_verbosity + self._primary_metric: str = "" + self.primary_metric = primary_metric + + self.target_column_name = target_column_name + + self._featurization = featurization + self._limits = limits or NlpLimitSettings() + self._sweep = sweep + self.training_parameters = training_parameters # via setter method. + self._search_space = search_space + + @property + def training_parameters(self) -> Optional[NlpFixedParameters]: + """Parameters that are used for all submitted jobs. + + :return: fixed training parameters for NLP jobs + :rtype: ~azure.ai.ml.automl.NlpFixedParameters + """ + return self._training_parameters + + @training_parameters.setter + def training_parameters(self, value: Union[Dict, NlpFixedParameters]) -> None: + if value is None: + self._training_parameters = None + elif isinstance(value, NlpFixedParameters): + self._training_parameters = value + # Convert parameters from snake case to enum. + self.set_training_parameters(learning_rate_scheduler=value.learning_rate_scheduler) + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for nlp training parameters." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training_parameters(**value) + + @property + def search_space(self) -> Optional[List[NlpSearchSpace]]: + """Search space(s) to sweep over for NLP sweep jobs + + :return: list of search spaces to sweep over for NLP jobs + :rtype: List[~azure.ai.ml.automl.NlpSearchSpace] + """ + return self._search_space + + @search_space.setter + def search_space(self, value: Union[List[dict], List[SearchSpace]]) -> None: + if not isinstance(value, list): + msg = "Expected a list for search space." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + all_dict_type = all(isinstance(item, dict) for item in value) + all_search_space_type = all(isinstance(item, SearchSpace) for item in value) + + if not (all_search_space_type or all_dict_type): + msg = "Expected all items in the list to be either dictionaries or SearchSpace objects." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + self._search_space = [ + cast_to_specific_search_space(item, NlpSearchSpace, self.task_type) for item in value # type: ignore + ] + + @property + def primary_metric(self) -> str: + """Primary metric to display from NLP job + + :return: primary metric to display + :rtype: str + """ + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: str) -> None: + self._primary_metric = value + + @property + def log_verbosity(self) -> LogVerbosity: + """Log verbosity configuration + + :return: the degree of verbosity used in logging + :rtype: ~azure.mgmt.machinelearningservices.models.LogVerbosity + """ + return self._log_verbosity + + @log_verbosity.setter + def log_verbosity(self, value: Union[str, LogVerbosity]) -> None: + self._log_verbosity = None if value is None else LogVerbosity[camel_to_snake(value).upper()] + + @property + def limits(self) -> NlpLimitSettings: + """Limit settings for NLP jobs + + :return: limit configuration for NLP job + :rtype: ~azure.ai.ml.automl.NlpLimitSettings + """ + return self._limits + + @limits.setter + def limits(self, value: Union[Dict, NlpLimitSettings]) -> None: + if isinstance(value, NlpLimitSettings): + self._limits = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for limit settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_limits(**value) + + @property + def sweep(self) -> Optional[NlpSweepSettings]: + """Sweep settings used for NLP job + + :return: sweep settings + :rtype: ~azure.ai.ml.automl.NlpSweepSettings + """ + return self._sweep + + @sweep.setter + def sweep(self, value: Union[Dict, NlpSweepSettings]) -> None: + if isinstance(value, NlpSweepSettings): + self._sweep = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for sweep settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_sweep(**value) + + @property + def featurization(self) -> Optional[NlpFeaturizationSettings]: + """Featurization settings used for NLP job + + :return: featurization settings + :rtype: ~azure.ai.ml.automl.NlpFeaturizationSettings + """ + return self._featurization + + @featurization.setter + def featurization(self, value: Union[Dict, NlpFeaturizationSettings]) -> None: + if isinstance(value, NlpFeaturizationSettings): + self._featurization = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for featurization settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_featurization(**value) + + def set_data(self, *, training_data: Input, target_column_name: str, validation_data: Input) -> None: + """Define data configuration for NLP job + + :keyword training_data: Training data + :type training_data: ~azure.ai.ml.Input + :keyword target_column_name: Column name of the target column. + :type target_column_name: str + :keyword validation_data: Validation data + :type validation_data: ~azure.ai.ml.Input + """ + # Properties for NlpVerticalDataSettings + self.target_column_name = target_column_name + self.training_data = training_data + self.validation_data = validation_data + + def set_limits( + self, + *, + max_trials: int = 1, + max_concurrent_trials: int = 1, + max_nodes: int = 1, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ) -> None: + """Define limit configuration for AutoML NLP job + + :keyword max_trials: Maximum number of AutoML iterations, defaults to 1 + :type max_trials: int, optional + :keyword max_concurrent_trials: Maximum number of concurrent AutoML iterations, defaults to 1 + :type max_concurrent_trials: int, optional + :keyword max_nodes: Maximum number of nodes used for sweep, defaults to 1 + :type max_nodes: int, optional + :keyword timeout_minutes: Timeout for the AutoML job, defaults to None + :type timeout_minutes: Optional[int] + :keyword trial_timeout_minutes: Timeout for each AutoML trial, defaults to None + :type trial_timeout_minutes: Optional[int] + """ + self._limits = NlpLimitSettings( + max_trials=max_trials, + max_concurrent_trials=max_concurrent_trials, + max_nodes=max_nodes, + timeout_minutes=timeout_minutes, + trial_timeout_minutes=trial_timeout_minutes, + ) + + def set_sweep( + self, + *, + sampling_algorithm: Union[str, SamplingAlgorithmType], + early_termination: Optional[EarlyTerminationPolicy] = None, + ) -> None: + """Define sweep configuration for AutoML NLP job + + :keyword sampling_algorithm: Required. Specifies type of hyperparameter sampling algorithm. + Possible values include: "Grid", "Random", and "Bayesian". + :type sampling_algorithm: Union[str, ~azure.ai.ml.automl.SamplingAlgorithmType] + :keyword early_termination: Optional. early termination policy to end poorly performing training candidates, + defaults to None. + :type early_termination: Optional[~azure.mgmt.machinelearningservices.models.EarlyTerminationPolicy] + """ + if self._sweep: + self._sweep.sampling_algorithm = sampling_algorithm + else: + self._sweep = NlpSweepSettings(sampling_algorithm=sampling_algorithm) + + self._sweep.early_termination = early_termination or self._sweep.early_termination + + def set_training_parameters( + self, + *, + gradient_accumulation_steps: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[Union[str, NlpLearningRateScheduler]] = None, + model_name: Optional[str] = None, + number_of_epochs: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_ratio: Optional[float] = None, + weight_decay: Optional[float] = None, + ) -> None: + """Fix certain training parameters throughout the training procedure for all candidates. + + :keyword gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer., defaults to None + :type gradient_accumulation_steps: Optional[int] + :keyword learning_rate: initial learning rate. Must be a float in (0, 1)., defaults to None + :type learning_rate: Optional[float] + :keyword learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup'., defaults to None + :type learning_rate_scheduler: Optional[Union[str, ~azure.ai.ml.automl.NlpLearningRateScheduler]] + :keyword model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased'., + defaults to None + :type model_name: Optional[str] + :keyword number_of_epochs: the number of epochs to train with. Must be a positive integer., defaults to None + :type number_of_epochs: Optional[int] + :keyword training_batch_size: the batch size during training. Must be a positive integer., defaults to None + :type training_batch_size: Optional[int] + :keyword validation_batch_size: the batch size during validation. Must be a positive integer., defaults to None + :type validation_batch_size: Optional[int] + :keyword warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1]., defaults to None + :type warmup_ratio: Optional[float] + :keyword weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1]., defaults to None + :type weight_decay: Optional[float] + """ + self._training_parameters = self._training_parameters or NlpFixedParameters() + + self._training_parameters.gradient_accumulation_steps = ( + gradient_accumulation_steps + if gradient_accumulation_steps is not None + else self._training_parameters.gradient_accumulation_steps + ) + + self._training_parameters.learning_rate = ( + learning_rate if learning_rate is not None else self._training_parameters.learning_rate + ) + + self._training_parameters.learning_rate_scheduler = ( + NlpLearningRateScheduler[camel_to_snake(learning_rate_scheduler).upper()] + if learning_rate_scheduler is not None + else self._training_parameters.learning_rate_scheduler + ) + + self._training_parameters.model_name = ( + model_name if model_name is not None else self._training_parameters.model_name + ) + + self._training_parameters.number_of_epochs = ( + number_of_epochs if number_of_epochs is not None else self._training_parameters.number_of_epochs + ) + + self._training_parameters.training_batch_size = ( + training_batch_size if training_batch_size is not None else self._training_parameters.training_batch_size + ) + + self._training_parameters.validation_batch_size = ( + validation_batch_size + if validation_batch_size is not None + else self._training_parameters.validation_batch_size + ) + + self._training_parameters.warmup_ratio = ( + warmup_ratio if warmup_ratio is not None else self._training_parameters.warmup_ratio + ) + + self._training_parameters.weight_decay = ( + weight_decay if weight_decay is not None else self._training_parameters.weight_decay + ) + + def set_featurization(self, *, dataset_language: Optional[str] = None) -> None: + """Define featurization configuration for AutoML NLP job. + + :keyword dataset_language: Language of the dataset, defaults to None + :type dataset_language: Optional[str] + """ + self._featurization = NlpFeaturizationSettings( + dataset_language=dataset_language, + ) + + def extend_search_space(self, value: Union[SearchSpace, List[SearchSpace]]) -> None: + """Add (a) search space(s) for an AutoML NLP job. + + :param value: either a SearchSpace object or a list of SearchSpace objects with nlp-specific parameters. + :type value: Union[~azure.ai.ml.automl.SearchSpace, List[~azure.ai.ml.automl.SearchSpace]] + """ + self._search_space = self._search_space or [] + if isinstance(value, list): + self._search_space.extend( + [cast_to_specific_search_space(item, NlpSearchSpace, self.task_type) for item in value] # type: ignore + ) + else: + self._search_space.append( + cast_to_specific_search_space(value, NlpSearchSpace, self.task_type) # type: ignore + ) + + @classmethod + def _get_search_space_from_str(cls, search_space_str: Optional[str]) -> Optional[List]: + if search_space_str is not None: + return [NlpSearchSpace._from_rest_object(entry) for entry in search_space_str if entry is not None] + return None + + def _restore_data_inputs(self) -> None: + """Restore MLTableJobInputs to Inputs within data_settings. + + self.training_data and self.validation_data should reflect what user passed in (Input) Once we get response back + from service (as MLTableJobInput), we should set responsible ones back to Input + """ + super()._restore_data_inputs() + self.training_data = self.training_data if self.training_data else None # type: ignore + self.validation_data = self.validation_data if self.validation_data else None # type: ignore + + def __eq__(self, other: object) -> bool: + if not isinstance(other, AutoMLNLPJob): + return NotImplemented + + return ( + self.primary_metric == other.primary_metric + and self.log_verbosity == other.log_verbosity + and self.training_data == other.training_data + and self.validation_data == other.validation_data + and self._featurization == other._featurization + and self._limits == other._limits + and self._sweep == other._sweep + and self._training_parameters == other._training_parameters + and self._search_space == other._search_space + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py new file mode 100644 index 00000000..5649dea2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py @@ -0,0 +1,47 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + NlpVerticalFeaturizationSettings as RestNlpVerticalFeaturizationSettings, +) +from azure.ai.ml.entities._job.automl.featurization_settings import FeaturizationSettings, FeaturizationSettingsType + + +class NlpFeaturizationSettings(FeaturizationSettings): + """Featurization settings for all AutoML NLP Verticals. + + :ivar type: Specifies the type of FeaturizationSettings. Set automatically to "NLP" for this class. + :vartype type: str + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_featurization_settings] + :end-before: [END automl.nlp_featurization_settings] + :language: python + :dedent: 8 + :caption: creating an nlp featurization settings + """ + + type = FeaturizationSettingsType.NLP + + def _to_rest_object(self) -> RestNlpVerticalFeaturizationSettings: + return RestNlpVerticalFeaturizationSettings( + dataset_language=self.dataset_language, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpVerticalFeaturizationSettings) -> "NlpFeaturizationSettings": + return cls( + dataset_language=obj.dataset_language, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpFeaturizationSettings): + return NotImplemented + + return super().__eq__(other) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py new file mode 100644 index 00000000..13c594b6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py @@ -0,0 +1,117 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpFixedParameters as RestNlpFixedParameters +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpFixedParameters(RestTranslatableMixin): + """Configuration of fixed parameters for all candidates of an AutoML NLP Job + + :param gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer, defaults to None + :type gradient_accumulation_steps: Optional[int] + :param learning_rate: initial learning rate. Must be a float in (0, 1), defaults to None + :type learning_rate: Optional[float] + :param learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup', defaults to None + :type learning_rate_scheduler: Optional[str] + :param model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased', + defaults to None + :type model_name: Optional[str] + :param number_of_epochs: the number of epochs to train with. Must be a positive integer, defaults to None + :type number_of_epochs: Optional[int] + :param training_batch_size: the batch size during training. Must be a positive integer, defaults to None + :type training_batch_size: Optional[int] + :param validation_batch_size: the batch size during validation. Must be a positive integer, defaults to None + :type validation_batch_size: Optional[int] + :param warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1], defaults to None + :type warmup_ratio: Optional[float] + :param weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1] defaults to None + :type weight_decay: Optional[float] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_fixed_parameters] + :end-before: [END automl.nlp_fixed_parameters] + :language: python + :dedent: 8 + :caption: creating an nlp fixed parameters + """ + + def __init__( + self, + *, + gradient_accumulation_steps: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[str] = None, + model_name: Optional[str] = None, + number_of_epochs: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_ratio: Optional[float] = None, + weight_decay: Optional[float] = None, + ): + self.gradient_accumulation_steps = gradient_accumulation_steps + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.number_of_epochs = number_of_epochs + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_ratio = warmup_ratio + self.weight_decay = weight_decay + + def _to_rest_object(self) -> RestNlpFixedParameters: + return RestNlpFixedParameters( + gradient_accumulation_steps=self.gradient_accumulation_steps, + learning_rate=self.learning_rate, + learning_rate_scheduler=self.learning_rate_scheduler, + model_name=self.model_name, + number_of_epochs=self.number_of_epochs, + training_batch_size=self.training_batch_size, + validation_batch_size=self.validation_batch_size, + warmup_ratio=self.warmup_ratio, + weight_decay=self.weight_decay, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpFixedParameters) -> "NlpFixedParameters": + return cls( + gradient_accumulation_steps=obj.gradient_accumulation_steps, + learning_rate=obj.learning_rate, + learning_rate_scheduler=obj.learning_rate_scheduler, + model_name=obj.model_name, + number_of_epochs=obj.number_of_epochs, + training_batch_size=obj.training_batch_size, + validation_batch_size=obj.validation_batch_size, + warmup_ratio=obj.warmup_ratio, + weight_decay=obj.weight_decay, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpFixedParameters): + return NotImplemented + + return ( + self.gradient_accumulation_steps == other.gradient_accumulation_steps + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.number_of_epochs == other.number_of_epochs + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_ratio == other.warmup_ratio + and self.weight_decay == other.weight_decay + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py new file mode 100644 index 00000000..1e99f4f0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py @@ -0,0 +1,79 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpVerticalLimitSettings as RestNlpLimitSettings +from azure.ai.ml._utils.utils import from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpLimitSettings(RestTranslatableMixin): + """Limit settings for all AutoML NLP Verticals. + + :param max_concurrent_trials: Maximum number of concurrent AutoML iterations. + :type max_concurrent_trials: int + :param max_trials: Maximum number of AutoML iterations. + :type max_trials: int + :param timeout_minutes: AutoML job timeout. + :type timeout_minutes: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_limit_settings] + :end-before: [END automl.nlp_limit_settings] + :language: python + :dedent: 8 + :caption: creating an nlp limit settings + """ + + def __init__( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_trials: int = 1, + max_nodes: int = 1, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ): + self.max_concurrent_trials = max_concurrent_trials + self.max_trials = max_trials + self.max_nodes = max_nodes + self.timeout_minutes = timeout_minutes + self.trial_timeout_minutes = trial_timeout_minutes + + def _to_rest_object(self) -> RestNlpLimitSettings: + return RestNlpLimitSettings( + max_concurrent_trials=self.max_concurrent_trials, + max_trials=self.max_trials, + max_nodes=self.max_nodes, + timeout=to_iso_duration_format_mins(self.timeout_minutes), + trial_timeout=to_iso_duration_format_mins(self.trial_timeout_minutes), + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpLimitSettings) -> "NlpLimitSettings": + return cls( + max_concurrent_trials=obj.max_concurrent_trials, + max_trials=obj.max_trials, + max_nodes=obj.max_nodes, + timeout_minutes=from_iso_duration_format_mins(obj.timeout), + trial_timeout_minutes=from_iso_duration_format_mins(obj.trial_timeout), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpLimitSettings): + return NotImplemented + + return ( + self.max_concurrent_trials == other.max_concurrent_trials + and self.max_trials == other.max_trials + and self.max_nodes == other.max_nodes + and self.timeout_minutes == other.timeout_minutes + and self.trial_timeout_minutes == other.trial_timeout_minutes + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py new file mode 100644 index 00000000..e4ad435f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py @@ -0,0 +1,185 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler, NlpParameterSubspace +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants import NlpModels +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.search_space_utils import _convert_from_rest_object, _convert_to_rest_object +from azure.ai.ml.entities._job.sweep.search_space import Choice, SweepDistribution +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpSearchSpace(RestTranslatableMixin): + """Search space for AutoML NLP tasks. + + :param gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer., defaults to None + :type gradient_accumulation_steps: Optional[Union[int, SweepDistribution]] + :param learning_rate: initial learning rate. Must be a float in (0, 1), defaults to None + :type learning_rate: Optional[Union[float, SweepDistribution]] + :param learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup', defaults to None + :type learning_rate_scheduler: Optional[Union[str, SweepDistribution]] + :param model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased', + defaults to None + :type model_name: Optional[Union[str, SweepDistribution]] + :param number_of_epochs: the number of epochs to train with. Must be a positive integer, defaults to None + :type number_of_epochs: Optional[Union[int, SweepDistribution]] + :param training_batch_size: the batch size during training. Must be a positive integer, defaults to None + :type training_batch_size: Optional[Union[int, SweepDistribution]] + :param validation_batch_size: the batch size during validation. Must be a positive integer, defaults to None + :type validation_batch_size: Optional[Union[int, SweepDistribution]] + :param warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1], defaults to None + :type warmup_ratio: Optional[Union[float, SweepDistribution]] + :param weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1], defaults to None + :type weight_decay: Optional[Union[float, SweepDistribution]] + + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_search_space] + :end-before: [END automl.nlp_search_space] + :language: python + :dedent: 8 + :caption: creating an nlp search space + """ + + def __init__( + self, + *, + gradient_accumulation_steps: Optional[Union[int, SweepDistribution]] = None, + learning_rate: Optional[Union[float, SweepDistribution]] = None, + learning_rate_scheduler: Optional[Union[str, SweepDistribution]] = None, + model_name: Optional[Union[str, SweepDistribution]] = None, + number_of_epochs: Optional[Union[int, SweepDistribution]] = None, + training_batch_size: Optional[Union[int, SweepDistribution]] = None, + validation_batch_size: Optional[Union[int, SweepDistribution]] = None, + warmup_ratio: Optional[Union[float, SweepDistribution]] = None, + weight_decay: Optional[Union[float, SweepDistribution]] = None + ): + # Since we want customers to be able to specify enums as well rather than just strings, we need to access + # the enum values here before we serialize them ('NlpModels.BERT_BASE_CASED' vs. 'bert-base-cased'). + if isinstance(learning_rate_scheduler, NlpLearningRateScheduler): + learning_rate_scheduler = camel_to_snake(learning_rate_scheduler.value) + elif isinstance(learning_rate_scheduler, Choice): + if learning_rate_scheduler.values is not None: + learning_rate_scheduler.values = [ + camel_to_snake(item.value) if isinstance(item, NlpLearningRateScheduler) else item + for item in learning_rate_scheduler.values + ] + + if isinstance(model_name, NlpModels): + model_name = model_name.value + elif isinstance(model_name, Choice): + if model_name.values is not None: + model_name.values = [item.value if isinstance(item, NlpModels) else item for item in model_name.values] + + self.gradient_accumulation_steps = gradient_accumulation_steps + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.number_of_epochs = number_of_epochs + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_ratio = warmup_ratio + self.weight_decay = weight_decay + + def _to_rest_object(self) -> NlpParameterSubspace: + return NlpParameterSubspace( + gradient_accumulation_steps=( + _convert_to_rest_object(self.gradient_accumulation_steps) + if self.gradient_accumulation_steps is not None + else None + ), + learning_rate=_convert_to_rest_object(self.learning_rate) if self.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_to_rest_object(self.learning_rate_scheduler) + if self.learning_rate_scheduler is not None + else None + ), + model_name=_convert_to_rest_object(self.model_name) if self.model_name is not None else None, + number_of_epochs=( + _convert_to_rest_object(self.number_of_epochs) if self.number_of_epochs is not None else None + ), + training_batch_size=( + _convert_to_rest_object(self.training_batch_size) if self.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_to_rest_object(self.validation_batch_size) if self.validation_batch_size is not None else None + ), + warmup_ratio=_convert_to_rest_object(self.warmup_ratio) if self.warmup_ratio is not None else None, + weight_decay=_convert_to_rest_object(self.weight_decay) if self.weight_decay is not None else None, + ) + + @classmethod + def _from_rest_object(cls, obj: NlpParameterSubspace) -> "NlpSearchSpace": + return cls( + gradient_accumulation_steps=( + _convert_from_rest_object(obj.gradient_accumulation_steps) + if obj.gradient_accumulation_steps is not None + else None + ), + learning_rate=_convert_from_rest_object(obj.learning_rate) if obj.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_from_rest_object(obj.learning_rate_scheduler) + if obj.learning_rate_scheduler is not None + else None + ), + model_name=_convert_from_rest_object(obj.model_name) if obj.model_name is not None else None, + number_of_epochs=( + _convert_from_rest_object(obj.number_of_epochs) if obj.number_of_epochs is not None else None + ), + training_batch_size=( + _convert_from_rest_object(obj.training_batch_size) if obj.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_from_rest_object(obj.validation_batch_size) if obj.validation_batch_size is not None else None + ), + warmup_ratio=_convert_from_rest_object(obj.warmup_ratio) if obj.warmup_ratio is not None else None, + weight_decay=_convert_from_rest_object(obj.weight_decay) if obj.weight_decay is not None else None, + ) + + @classmethod + def _from_search_space_object(cls, obj: SearchSpace) -> "NlpSearchSpace": + return cls( + gradient_accumulation_steps=( + obj.gradient_accumulation_steps if hasattr(obj, "gradient_accumulation_steps") else None + ), + learning_rate=obj.learning_rate if hasattr(obj, "learning_rate") else None, + learning_rate_scheduler=obj.learning_rate_scheduler if hasattr(obj, "learning_rate_scheduler") else None, + model_name=obj.model_name if hasattr(obj, "model_name") else None, + number_of_epochs=obj.number_of_epochs if hasattr(obj, "number_of_epochs") else None, + training_batch_size=obj.training_batch_size if hasattr(obj, "training_batch_size") else None, + validation_batch_size=obj.validation_batch_size if hasattr(obj, "validation_batch_size") else None, + warmup_ratio=obj.warmup_ratio if hasattr(obj, "warmup_ratio") else None, + weight_decay=obj.weight_decay if hasattr(obj, "weight_decay") else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpSearchSpace): + return NotImplemented + + return ( + self.gradient_accumulation_steps == other.gradient_accumulation_steps + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.number_of_epochs == other.number_of_epochs + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_ratio == other.warmup_ratio + and self.weight_decay == other.weight_decay + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py new file mode 100644 index 00000000..e446a30c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py @@ -0,0 +1,65 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpSweepSettings as RestNlpSweepSettings +from azure.ai.ml._restclient.v2023_04_01_preview.models import SamplingAlgorithmType +from azure.ai.ml.entities._job.sweep.early_termination_policy import EarlyTerminationPolicy +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +# pylint: disable=protected-access +class NlpSweepSettings(RestTranslatableMixin): + """Sweep settings for all AutoML NLP tasks. + + :param sampling_algorithm: Required. Specifies type of hyperparameter sampling algorithm. + Possible values include: "Grid", "Random", and "Bayesian". + :type sampling_algorithm: Union[str, ~azure.ai.ml.automl.SamplingAlgorithmType] + :param early_termination: Early termination policy to end poorly performing training candidates, + defaults to None. + :type early_termination: Optional[~azure.mgmt.machinelearningservices.models.EarlyTerminationPolicy] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_sweep_settings] + :end-before: [END automl.nlp_sweep_settings] + :language: python + :dedent: 8 + :caption: creating an nlp sweep settings + """ + + def __init__( + self, + *, + sampling_algorithm: Union[str, SamplingAlgorithmType], + early_termination: Optional[EarlyTerminationPolicy] = None, + ): + self.sampling_algorithm = sampling_algorithm + self.early_termination = early_termination + + def _to_rest_object(self) -> RestNlpSweepSettings: + return RestNlpSweepSettings( + sampling_algorithm=self.sampling_algorithm, + early_termination=self.early_termination._to_rest_object() if self.early_termination else None, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpSweepSettings) -> "NlpSweepSettings": + return cls( + sampling_algorithm=obj.sampling_algorithm, + early_termination=( + EarlyTerminationPolicy._from_rest_object(obj.early_termination) if obj.early_termination else None + ), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpSweepSettings): + return NotImplemented + + return self.sampling_algorithm == other.sampling_algorithm and self.early_termination == other.early_termination + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py new file mode 100644 index 00000000..290f4f70 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py @@ -0,0 +1,248 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._restclient.v2023_04_01_preview.models._azure_machine_learning_workspaces_enums import ( + ClassificationPrimaryMetrics, +) +from azure.ai.ml._restclient.v2024_01_01_preview.models import TextClassification as RestTextClassification +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextClassificationJob(AutoMLNLPJob): + """Configuration for AutoML Text Classification Job. + + :param target_column_name: The name of the target column, defaults to None + :type target_column_name: Optional[str] + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed, defaults to None + :type primary_metric: Optional[~azure.ai.ml.automl.ClassificationPrimaryMetrics] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.automl_nlp_job.text_classification_job] + :end-before: [END automl.automl_nlp_job.text_classification_job] + :language: python + :dedent: 8 + :caption: creating an automl text classification job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + target_column_name: Optional[str] = None, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[ClassificationPrimaryMetrics] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super().__init__( + task_type=TaskType.TEXT_CLASSIFICATION, + primary_metric=primary_metric or TextClassificationJob._DEFAULT_PRIMARY_METRIC, + target_column_name=target_column_name, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + """setter for primary metric + + :param value: _description_ + :type value: Union[str, ClassificationPrimaryMetrics] + """ + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextClassificationJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_classification = RestTextClassification( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_classification) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_classification, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextClassificationJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextClassification = properties.task_details + assert isinstance(task_details, RestTextClassification) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_classification_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_classification_job._restore_data_inputs() + + return text_classification_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "TextClassificationJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_classification import TextClassificationSchema + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationNode + + loaded_data = load_from_dict( + AutoMLTextClassificationNode, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(TextClassificationSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextClassificationJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextClassificationJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_classification import TextClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextClassificationNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextClassificationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextClassificationJob): + return NotImplemented + + if not super(TextClassificationJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py new file mode 100644 index 00000000..ac19b451 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py @@ -0,0 +1,252 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationMultilabelPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + TextClassificationMultilabel as RestTextClassificationMultilabel, +) +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextClassificationMultilabelJob(AutoMLNLPJob): + """Configuration for AutoML Text Classification Multilabel Job. + + :param target_column_name: The name of the target column, defaults to None + :type target_column_name: Optional[str] + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed., defaults to None + :type primary_metric: Optional[str] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_classification_multilabel_job] + :end-before: [END automl.text_classification_multilabel_job] + :language: python + :dedent: 8 + :caption: creating an automl text classification multilabel job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationMultilabelPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + target_column_name: Optional[str] = None, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super().__init__( + task_type=TaskType.TEXT_CLASSIFICATION_MULTILABEL, + primary_metric=primary_metric or TextClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC, + target_column_name=target_column_name, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationMultilabelPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationMultilabelPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationMultilabelPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_classification_multilabel = RestTextClassificationMultilabel( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_classification_multilabel) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_classification_multilabel, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextClassificationMultilabelJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextClassificationMultilabel = properties.task_details + assert isinstance(task_details, RestTextClassificationMultilabel) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_classification_multilabel_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_classification_multilabel_job._restore_data_inputs() + + return text_classification_multilabel_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "TextClassificationMultilabelJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_classification_multilabel import ( + TextClassificationMultilabelSchema, + ) + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationMultilabelNode + + loaded_data = load_from_dict( + AutoMLTextClassificationMultilabelNode, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict( + TextClassificationMultilabelSchema, + data, + context, + additional_message, + **kwargs, + ) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextClassificationMultilabelJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextClassificationMultilabelJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_classification_multilabel import ( + TextClassificationMultilabelSchema, + ) + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationMultilabelNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextClassificationMultilabelNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextClassificationMultilabelSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextClassificationMultilabelJob): + return NotImplemented + + if not super(TextClassificationMultilabelJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py new file mode 100644 index 00000000..a87965f1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py @@ -0,0 +1,231 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._restclient.v2023_04_01_preview.models._azure_machine_learning_workspaces_enums import ( + ClassificationPrimaryMetrics, +) +from azure.ai.ml._restclient.v2024_01_01_preview.models import TextNer as RestTextNER +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextNerJob(AutoMLNLPJob): + """Configuration for AutoML Text NER Job. + + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, + defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed, defaults to None + :type primary_metric: Optional[str] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_ner_job] + :end-before: [END automl.text_ner_job] + :language: python + :dedent: 8 + :caption: creating an automl text ner job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super(TextNerJob, self).__init__( + task_type=TaskType.TEXT_NER, + primary_metric=primary_metric or TextNerJob._DEFAULT_PRIMARY_METRIC, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextNerJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_ner = RestTextNER( + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_ner) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_ner, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextNerJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextNER = properties.task_details + assert isinstance(task_details, RestTextNER) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_ner_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_ner_job._restore_data_inputs() + + return text_ner_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "TextNerJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_ner import TextNerSchema + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextNerNode + + loaded_data = load_from_dict(AutoMLTextNerNode, data, context, additional_message, **kwargs) + else: + loaded_data = load_from_dict(TextNerSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextNerJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextNerJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_ner import TextNerSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextNerNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextNerNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextNerSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextNerJob): + return NotImplemented + + if not super(TextNerJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space.py new file mode 100644 index 00000000..a958de56 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space.py @@ -0,0 +1,14 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + + +from typing import Any + + +class SearchSpace: + """SearchSpace class for AutoML verticals.""" + + def __init__(self, **kwargs: Any) -> None: + for k, v in kwargs.items(): + self.__setattr__(k, v) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space_utils.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space_utils.py new file mode 100644 index 00000000..732030d4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/search_space_utils.py @@ -0,0 +1,276 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import re +from typing import Any, List, Union + +from marshmallow import fields + +from azure.ai.ml._schema._sweep.search_space import ( + ChoiceSchema, + NormalSchema, + QNormalSchema, + QUniformSchema, + RandintSchema, + UniformSchema, +) +from azure.ai.ml._schema.core.fields import DumpableIntegerField, DumpableStringField, NestedField, UnionField +from azure.ai.ml._utils.utils import float_to_str +from azure.ai.ml.constants._job.sweep import SearchSpace +from azure.ai.ml.entities._job.sweep.search_space import ( + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + SweepDistribution, + Uniform, +) +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +def _convert_to_rest_object(sweep_distribution: Union[bool, int, float, str, SweepDistribution]) -> str: + if isinstance(sweep_distribution, float): + # Float requires some special handling for small values that get auto-represented with scientific notation. + res: str = float_to_str(sweep_distribution) + return res + if not isinstance(sweep_distribution, SweepDistribution): + # Convert [bool, float, str] types to str + return str(sweep_distribution) + + rest_object = sweep_distribution._to_rest_object() + if not isinstance(rest_object, list): + msg = "Rest Object for sweep distribution should be a list." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + if len(rest_object) <= 1: + msg = "Rest object for sweep distribution should contain at least two elements." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + sweep_distribution_type = rest_object[0] + sweep_distribution_args = [] + + if not isinstance(rest_object[1], list): + msg = "The second element of Rest object for sweep distribution should be a list." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + if sweep_distribution_type == SearchSpace.CHOICE: + # Rest objects for choice distribution are of format ["choice", [[0, 1, 2]]] + if not isinstance(rest_object[1][0], list): + msg = "The second element of Rest object for choice distribution should be a list of list." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + for value in rest_object[1][0]: + if isinstance(value, str): + sweep_distribution_args.append("'" + value + "'") + elif isinstance(value, float): + sweep_distribution_args.append(float_to_str(value)) + else: + sweep_distribution_args.append(str(value)) + else: + for value in rest_object[1]: + if isinstance(value, float): + sweep_distribution_args.append(float_to_str(value)) + else: + sweep_distribution_args.append(str(value)) + + sweep_distribution_str: str = sweep_distribution_type + "(" + sweep_distribution_str += ",".join(sweep_distribution_args) + sweep_distribution_str += ")" + return sweep_distribution_str + + +def _is_int(value: str) -> bool: + try: + int(value) + return True + except ValueError: + return False + + +def _is_float(value: str) -> bool: + try: + float(value) + return True + except ValueError: + return False + + +def _get_type_inferred_value(value: str) -> Union[bool, int, float, str]: + value = value.strip() + if _is_int(value): + # Int + return int(value) + if _is_float(value): + # Float + return float(value) + if value in ["True", "False"]: + # Convert "True", "False" to python boolean literals + return value == "True" + # string value. Remove quotes before returning. + return value.strip("'\"") + + +def _convert_from_rest_object( + sweep_distribution_str: str, +) -> Any: + # sweep_distribution_str can be a distribution like "choice('vitb16r224', 'vits16r224')" or + # a single value like "True", "1", "1.0567", "vitb16r224" + + sweep_distribution_str = sweep_distribution_str.strip() + # Filter by the delimiters and remove splits that are empty strings + sweep_distribution_separated = list(filter(None, re.split("[ ,()]+", sweep_distribution_str))) + + if len(sweep_distribution_separated) == 1: + # Single value. + return _get_type_inferred_value(sweep_distribution_separated[0]) + + # Distribution string + sweep_distribution_type = sweep_distribution_separated[0].strip().lower() + sweep_distribution_args: List = [] + for value in sweep_distribution_separated[1:]: + sweep_distribution_args.append(_get_type_inferred_value(value)) + + if sweep_distribution_type == SearchSpace.CHOICE: + sweep_distribution_args = [sweep_distribution_args] # Choice values are list of lists + + sweep_distribution = SweepDistribution._from_rest_object([sweep_distribution_type, sweep_distribution_args]) + return sweep_distribution + + +def _convert_sweep_dist_dict_to_str_dict(sweep_distribution: dict) -> dict: + for k, sweep_dist_dict in sweep_distribution.items(): + if sweep_dist_dict is not None: + sweep_distribution[k] = _convert_sweep_dist_dict_item_to_str(sweep_dist_dict) + return sweep_distribution + + +class ChoicePlusSchema(ChoiceSchema): + """Choice schema that allows boolean values also""" + + values = fields.List( + UnionField( + [ + DumpableIntegerField(strict=True), + DumpableStringField(), + fields.Float(), + fields.Dict( + keys=fields.Str(), + values=UnionField( + [ + NestedField("ChoicePlusSchema"), + NestedField(NormalSchema()), + NestedField(QNormalSchema()), + NestedField(RandintSchema()), + NestedField(UniformSchema()), + NestedField(QUniformSchema()), + DumpableIntegerField(strict=True), + fields.Float(), + fields.Str(), + fields.Boolean(), + ] + ), + ), + fields.Boolean(), + ] + ) + ) + + +def _convert_sweep_dist_dict_item_to_str(sweep_distribution: Union[bool, int, float, str, dict]) -> str: + # Convert a Sweep Distribution dict to Sweep Distribution string + # Eg. {type: 'choice', values: ['vitb16r224','vits16r224']} => "Choice('vitb16r224','vits16r224')" + if isinstance(sweep_distribution, dict): + sweep_dist_type = sweep_distribution["type"] + if sweep_dist_type == SearchSpace.CHOICE: + sweep_dist_obj = ChoicePlusSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.UNIFORM_LOGUNIFORM: + sweep_dist_obj = UniformSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.NORMAL_LOGNORMAL: + sweep_dist_obj = NormalSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.QUNIFORM_QLOGUNIFORM: + sweep_dist_obj = QUniformSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.QNORMAL_QLOGNORMAL: + sweep_dist_obj = QNormalSchema().load(sweep_distribution) # pylint: disable=no-member + elif sweep_dist_type in SearchSpace.RANDINT: + sweep_dist_obj = RandintSchema().load(sweep_distribution) # pylint: disable=no-member + else: + msg = f"Unsupported sweep distribution type {sweep_dist_type}" + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + else: # Case for other primitive types + sweep_dist_obj = sweep_distribution + + sweep_dist_str = _convert_to_rest_object(sweep_dist_obj) + return sweep_dist_str + + +def _convert_sweep_dist_str_to_dict(sweep_dist_str_list: dict) -> dict: + for k, val in sweep_dist_str_list.items(): + if isinstance(val, str): + sweep_dist_str_list[k] = _convert_sweep_dist_str_item_to_dict(val) + return sweep_dist_str_list + + +def _convert_sweep_dist_str_item_to_dict( + sweep_distribution_str: str, +) -> Union[bool, int, float, str, dict]: + # sweep_distribution_str can be a distribution like "choice('vitb16r224', 'vits16r224')" + # return type is {type: 'choice', values: ['vitb16r224', 'vits16r224']} + sweep_dist_obj = _convert_from_rest_object(sweep_distribution_str) + sweep_dist: Union[bool, int, float, str, dict] = "" + if isinstance(sweep_dist_obj, SweepDistribution): + if isinstance(sweep_dist_obj, Choice): + sweep_dist = ChoicePlusSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, (QNormal, QLogNormal)): + sweep_dist = QNormalSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, (QUniform, QLogUniform)): + sweep_dist = QUniformSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, (Uniform, LogUniform)): + sweep_dist = UniformSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, (Normal, LogNormal)): + sweep_dist = NormalSchema().dump(sweep_dist_obj) # pylint: disable=no-member + elif isinstance(sweep_dist_obj, Randint): + sweep_dist = RandintSchema().dump(sweep_dist_obj) # pylint: disable=no-member + else: + msg = "Invalid sweep distribution {}" + raise ValidationException( + message=msg.format(sweep_distribution_str), + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + else: # Case for other primitive types + sweep_dist = sweep_dist_obj + + return sweep_dist diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/stack_ensemble_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/stack_ensemble_settings.py new file mode 100644 index 00000000..c17fa7e3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/stack_ensemble_settings.py @@ -0,0 +1,70 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Any, Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import StackEnsembleSettings as RestStackEnsembleSettings +from azure.ai.ml._restclient.v2023_04_01_preview.models import StackMetaLearnerType +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class StackEnsembleSettings(RestTranslatableMixin): + """Advance setting to customize StackEnsemble run.""" + + def __init__( + self, + *, + stack_meta_learner_k_wargs: Optional[Any] = None, + stack_meta_learner_train_percentage: float = 0.2, + stack_meta_learner_type: Optional[StackMetaLearnerType] = None, + **kwargs: Any + ): + """ + :param stack_meta_learner_k_wargs: Optional parameters to pass to the initializer of the + meta-learner. + :type stack_meta_learner_k_wargs: any + :param stack_meta_learner_train_percentage: Specifies the proportion of the training set + (when choosing train and validation type of training) to be reserved for training the + meta-learner. Default value is 0.2. + :type stack_meta_learner_train_percentage: float + :param stack_meta_learner_type: The meta-learner is a model trained on the output of the + individual heterogeneous models. Possible values include: "None", "LogisticRegression", + "LogisticRegressionCV", "LightGBMClassifier", "ElasticNet", "ElasticNetCV", + "LightGBMRegressor", "LinearRegression". + :type stack_meta_learner_type: str or + ~azure.mgmt.machinelearningservices.models.StackMetaLearnerType + """ + super(StackEnsembleSettings, self).__init__(**kwargs) + self.stack_meta_learner_k_wargs = stack_meta_learner_k_wargs + self.stack_meta_learner_train_percentage = stack_meta_learner_train_percentage + self.stack_meta_learner_type = stack_meta_learner_type + + def _to_rest_object(self) -> RestStackEnsembleSettings: + return RestStackEnsembleSettings( + stack_meta_learner_k_wargs=self.stack_meta_learner_k_wargs, + stack_meta_learner_train_percentage=self.stack_meta_learner_train_percentage, + stack_meta_learner_type=self.stack_meta_learner_type, + ) + + @classmethod + def _from_rest_object(cls, obj: RestStackEnsembleSettings) -> "StackEnsembleSettings": + return cls( + stack_meta_learner_k_wargs=obj.stack_meta_learner_k_wargs, + stack_meta_learner_train_percentage=obj.stack_meta_learner_train_percentage, + stack_meta_learner_type=obj.stack_meta_learner_type, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, StackEnsembleSettings): + return NotImplemented + + return ( + super().__eq__(other) + and self.stack_meta_learner_k_wargs == other.stack_meta_learner_k_wargs + and self.stack_meta_learner_train_percentage == other.stack_meta_learner_train_percentage + and self.stack_meta_learner_type == other.stack_meta_learner_type + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/__init__.py new file mode 100644 index 00000000..c0373010 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/__init__.py @@ -0,0 +1,22 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from .automl_tabular import AutoMLTabular +from .classification_job import ClassificationJob +from .featurization_settings import ColumnTransformer, TabularFeaturizationSettings +from .forecasting_job import ForecastingJob +from .forecasting_settings import ForecastingSettings +from .limit_settings import TabularLimitSettings +from .regression_job import RegressionJob + +__all__ = [ + "AutoMLTabular", + "ClassificationJob", + "ColumnTransformer", + "ForecastingJob", + "ForecastingSettings", + "RegressionJob", + "TabularFeaturizationSettings", + "TabularLimitSettings", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/automl_tabular.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/automl_tabular.py new file mode 100644 index 00000000..5f4ed22b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/automl_tabular.py @@ -0,0 +1,607 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=too-many-instance-attributes + +from abc import ABC +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + AutoNCrossValidations, + BlockedTransformers, + CustomNCrossValidations, + LogVerbosity, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants import TabularTrainingMode +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.automl_vertical import AutoMLVertical +from azure.ai.ml.entities._job.automl.stack_ensemble_settings import StackEnsembleSettings +from azure.ai.ml.entities._job.automl.tabular.featurization_settings import ( + ColumnTransformer, + TabularFeaturizationSettings, +) +from azure.ai.ml.entities._job.automl.tabular.limit_settings import TabularLimitSettings +from azure.ai.ml.entities._job.automl.training_settings import TrainingSettings +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class AutoMLTabular(AutoMLVertical, ABC): + """Initialize an AutoML job entity for tabular data. + + Constructor for AutoMLTabular. + + :keyword task_type: The type of task to run. Possible values include: "classification", "regression" + , "forecasting". + :paramtype task_type: str + :keyword featurization: featurization settings. Defaults to None. + :paramtype featurization: typing.Optional[TabularFeaturizationSettings] + :keyword limits: limits settings. Defaults to None. + :paramtype limits: typing.Optional[TabularLimitSettings] + :keyword training: training settings. Defaults to None. + :paramtype training: typing.Optional[TrainingSettings] + :keyword log_verbosity: Verbosity of logging. Possible values include: "debug", "info", "warning", "error", + "critical". Defaults to "info". + :paramtype log_verbosity: str + :keyword target_column_name: The name of the target column. Defaults to None. + :paramtype target_column_name: typing.Optional[str] + :keyword weight_column_name: The name of the weight column. Defaults to None. + :paramtype weight_column_name: typing.Optional[str] + :keyword validation_data_size: The size of the validation data. Defaults to None. + :paramtype validation_data_size: typing.Optional[float] + :keyword cv_split_column_names: The names of the columns to use for cross validation. Defaults to None. + :paramtype cv_split_column_names: typing.Optional[List[str]] + :keyword n_cross_validations: The number of cross validations to run. Defaults to None. + :paramtype n_cross_validations: typing.Optional[int] + :keyword test_data_size: The size of the test data. Defaults to None. + :paramtype test_data_size: typing.Optional[float] + :keyword training_data: The training data. Defaults to None. + :paramtype training_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :keyword validation_data: The validation data. Defaults to None. + :paramtype validation_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :keyword test_data: The test data. Defaults to None. + :paramtype test_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + """ + + def __init__( + self, + *, + task_type: str, + featurization: Optional[TabularFeaturizationSettings] = None, + limits: Optional[TabularLimitSettings] = None, + training: Optional[Any] = None, + **kwargs: Any, + ) -> None: + """Initialize an AutoML job entity for tabular data. + + Constructor for AutoMLTabular. + + :keyword task_type: The type of task to run. Possible values include: "classification", "regression" + , "forecasting". + :paramtype task_type: str + :keyword featurization: featurization settings. Defaults to None. + :paramtype featurization: typing.Optional[TabularFeaturizationSettings] + :keyword limits: limits settings. Defaults to None. + :paramtype limits: typing.Optional[TabularLimitSettings] + :keyword training: training settings. Defaults to None. + :paramtype training: typing.Optional[TrainingSettings] + :keyword log_verbosity: Verbosity of logging. Possible values include: "debug", "info", "warning", "error", + "critical". Defaults to "info". + :paramtype log_verbosity: str + :keyword target_column_name: The name of the target column. Defaults to None. + :paramtype target_column_name: typing.Optional[str] + :keyword weight_column_name: The name of the weight column. Defaults to None. + :paramtype weight_column_name: typing.Optional[str] + :keyword validation_data_size: The size of the validation data. Defaults to None. + :paramtype validation_data_size: typing.Optional[float] + :keyword cv_split_column_names: The names of the columns to use for cross validation. Defaults to None. + :paramtype cv_split_column_names: typing.Optional[List[str]] + :keyword n_cross_validations: The number of cross validations to run. Defaults to None. + :paramtype n_cross_validations: typing.Optional[int] + :keyword test_data_size: The size of the test data. Defaults to None. + :paramtype test_data_size: typing.Optional[float] + :keyword training_data: The training data. Defaults to None. + :paramtype training_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :keyword validation_data: The validation data. Defaults to None. + :paramtype validation_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :keyword test_data: The test data. Defaults to None. + :paramtype test_data: typing.Optional[azure.ai.ml.entities._inputs_outputs.Input] + :raises: :class:`azure.ai.ml.exceptions.ValidationException` + """ + self.log_verbosity = kwargs.pop("log_verbosity", LogVerbosity.INFO) + + self.target_column_name = kwargs.pop("target_column_name", None) + self.weight_column_name = kwargs.pop("weight_column_name", None) + self.validation_data_size = kwargs.pop("validation_data_size", None) + self.cv_split_column_names = kwargs.pop("cv_split_column_names", None) + self.n_cross_validations = kwargs.pop("n_cross_validations", None) + self.test_data_size = kwargs.pop("test_data_size", None) + + super().__init__( + task_type=task_type, + training_data=kwargs.pop("training_data", None), + validation_data=kwargs.pop("validation_data", None), + test_data=kwargs.pop("test_data", None), + **kwargs, + ) + + self._featurization = featurization + self._limits = limits + self._training = training + + @property + def log_verbosity(self) -> LogVerbosity: + """Get the log verbosity for the AutoML job. + + :return: log verbosity for the AutoML job + :rtype: LogVerbosity + """ + return self._log_verbosity + + @log_verbosity.setter + def log_verbosity(self, value: Union[str, LogVerbosity]) -> None: + """Set the log verbosity for the AutoML job. + + :param value: str or LogVerbosity + :type value: typing.Union[str, LogVerbosity] + """ + self._log_verbosity = None if value is None else LogVerbosity[camel_to_snake(value).upper()] + + @property + def limits(self) -> Optional[TabularLimitSettings]: + """Get the tabular limits for the AutoML job. + + :return: Tabular limits for the AutoML job + :rtype: TabularLimitSettings + """ + return self._limits + + @limits.setter + def limits(self, value: Union[Dict, TabularLimitSettings]) -> None: + """Set the limits for the AutoML job. + + :param value: typing.Dict or TabularLimitSettings + :type value: typing.Union[typing.Dict, TabularLimitSettings] + :raises ValidationException: Expected a dictionary for limit settings. + """ + if isinstance(value, TabularLimitSettings): + self._limits = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for limit settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_limits(**value) + + @property + def training(self) -> Any: + """Get the training settings for the AutoML job. + + :return: Training settings for the AutoML job. + :rtype: TrainingSettings + """ + return self._training + + @training.setter + def training(self, value: Union[Dict, TrainingSettings]) -> None: + """Set the training settings for the AutoML job. + + :param value: typing.Dict or TrainingSettings + :type value: typing.Union[typing.Dict, TrainingSettings] + :raises ValidationException: Expected a dictionary for training settings. + """ + if isinstance(value, TrainingSettings): + self._training = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for training settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training(**value) + + @property + def featurization(self) -> Optional[TabularFeaturizationSettings]: + """Get the tabular featurization settings for the AutoML job. + + :return: Tabular featurization settings for the AutoML job + :rtype: TabularFeaturizationSettings + """ + return self._featurization + + @featurization.setter + def featurization(self, value: Union[Dict, TabularFeaturizationSettings]) -> None: + """Set the featurization settings for the AutoML job. + + :param value: typing.Dict or TabularFeaturizationSettings + :type value: typing.Union[typing.Dict, TabularFeaturizationSettings] + :raises ValidationException: Expected a dictionary for featurization settings + """ + if isinstance(value, TabularFeaturizationSettings): + self._featurization = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for featurization settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_featurization(**value) + + def set_limits( + self, + *, + enable_early_termination: Optional[bool] = None, + exit_score: Optional[float] = None, + max_concurrent_trials: Optional[int] = None, + max_cores_per_trial: Optional[int] = None, + max_nodes: Optional[int] = None, + max_trials: Optional[int] = None, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ) -> None: + """Set limits for the job. + + :keyword enable_early_termination: Whether to enable early termination if the score is not improving in the + short term, defaults to None. + + Early stopping logic: + + * No early stopping for first 20 iterations (landmarks). + * Early stopping window starts on the 21st iteration and looks for early_stopping_n_iters iterations + (currently set to 10). This means that the first iteration where stopping can occur is the 31st. + * AutoML still schedules 2 ensemble iterations AFTER early stopping, which might result in higher scores. + * Early stopping is triggered if the absolute value of best score calculated is the same for past + early_stopping_n_iters iterations, that is, if there is no improvement in score for + early_stopping_n_iters iterations. + + :paramtype enable_early_termination: typing.Optional[bool] + :keyword exit_score: Target score for experiment. The experiment terminates after this score is reached. + If not specified (no criteria), the experiment runs until no further progress is made + on the primary metric. For for more information on exit criteria, see this `article + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#exit-criteria>`_ + , defaults to None + :paramtype exit_score: typing.Optional[float] + :keyword max_concurrent_trials: This is the maximum number of iterations that would be executed in parallel. + The default value is 1. + + * AmlCompute clusters support one iteration running per node. For multiple AutoML experiment parent runs + executed in parallel on a single AmlCompute cluster, the sum of the ``max_concurrent_trials`` values + for all experiments should be less than or equal to the maximum number of nodes. Otherwise, runs + will be queued until nodes are available. + + * DSVM supports multiple iterations per node. ``max_concurrent_trials`` should + be less than or equal to the number of cores on the DSVM. For multiple experiments + run in parallel on a single DSVM, the sum of the ``max_concurrent_trials`` values for all + experiments should be less than or equal to the maximum number of nodes. + + * Databricks - ``max_concurrent_trials`` should be less than or equal to the number of + worker nodes on Databricks. + + ``max_concurrent_trials`` does not apply to local runs. Formerly, this parameter + was named ``concurrent_iterations``. + :paramtype max_concurrent_trials: typing.Optional[int] + :keyword max_cores_per_trial: The maximum number of threads to use for a given training iteration. + Acceptable values: + + * Greater than 1 and less than or equal to the maximum number of cores on the compute target. + + * Equal to -1, which means to use all the possible cores per iteration per child-run. + + * Equal to 1, the default. + + :paramtype max_cores_per_trial: typing.Optional[int] + :keyword max_nodes: [Experimental] The maximum number of nodes to use for distributed training. + + * For forecasting, each model is trained using max(2, int(max_nodes / max_concurrent_trials)) nodes. + + * For classification/regression, each model is trained using max_nodes nodes. + + Note- This parameter is in public preview and might change in future. + :paramtype max_nodes: typing.Optional[int] + :keyword max_trials: The total number of different algorithm and parameter combinations to test during an + automated ML experiment. If not specified, the default is 1000 iterations. + :paramtype max_trials: typing.Optional[int] + :keyword timeout_minutes: Maximum amount of time in minutes that all iterations combined can take before the + experiment terminates. If not specified, the default experiment timeout is 6 days. To specify a timeout + less than or equal to 1 hour, make sure your dataset's size is not greater than + 10,000,000 (rows times column) or an error results, defaults to None + :paramtype timeout_minutes: typing.Optional[int] + :keyword trial_timeout_minutes: Maximum time in minutes that each iteration can run for before it terminates. + If not specified, a value of 1 month or 43200 minutes is used, defaults to None + :paramtype trial_timeout_minutes: typing.Optional[int] + """ + self._limits = self._limits or TabularLimitSettings() + self._limits.enable_early_termination = ( + enable_early_termination if enable_early_termination is not None else self._limits.enable_early_termination + ) + self._limits.exit_score = exit_score if exit_score is not None else self._limits.exit_score + self._limits.max_concurrent_trials = ( + max_concurrent_trials if max_concurrent_trials is not None else self._limits.max_concurrent_trials + ) + self._limits.max_cores_per_trial = ( + max_cores_per_trial if max_cores_per_trial is not None else self._limits.max_cores_per_trial + ) + self._limits.max_nodes = max_nodes if max_nodes is not None else self._limits.max_nodes + self._limits.max_trials = max_trials if max_trials is not None else self._limits.max_trials + self._limits.timeout_minutes = timeout_minutes if timeout_minutes is not None else self._limits.timeout_minutes + self._limits.trial_timeout_minutes = ( + trial_timeout_minutes if trial_timeout_minutes is not None else self._limits.trial_timeout_minutes + ) + + def set_training( + self, + *, + enable_onnx_compatible_models: Optional[bool] = None, + enable_dnn_training: Optional[bool] = None, + enable_model_explainability: Optional[bool] = None, + enable_stack_ensemble: Optional[bool] = None, + enable_vote_ensemble: Optional[bool] = None, + stack_ensemble_settings: Optional[StackEnsembleSettings] = None, + ensemble_model_download_timeout: Optional[int] = None, + allowed_training_algorithms: Optional[List[str]] = None, + blocked_training_algorithms: Optional[List[str]] = None, + training_mode: Optional[Union[str, TabularTrainingMode]] = None, + ) -> None: + """The method to configure training related settings. + + :keyword enable_onnx_compatible_models: Whether to enable or disable enforcing the ONNX-compatible models. + The default is False. For more information about Open Neural Network Exchange (ONNX) and Azure Machine + Learning,see this `article <https://learn.microsoft.com/azure/machine-learning/concept-onnx>`__. + :paramtype enable_onnx_compatible_models: typing.Optional[bool] + :keyword enable_dnn_training: Whether to include DNN based models during model selection. + However, the default is True for DNN NLP tasks, and it's False for all other AutoML tasks. + :paramtype enable_dnn_training: typing.Optional[bool] + :keyword enable_model_explainability: Whether to enable explaining the best AutoML model at the end of all + AutoML training iterations. For more information, see + `Interpretability: model explanations in automated machine learning + <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__. + , defaults to None + :paramtype enable_model_explainability: typing.Optional[bool] + :keyword enable_stack_ensemble: Whether to enable/disable StackEnsemble iteration. + If `enable_onnx_compatible_models` flag is being set, then StackEnsemble iteration will be disabled. + Similarly, for Timeseries tasks, StackEnsemble iteration will be disabled by default, to avoid risks of + overfitting due to small training set used in fitting the meta learner. + For more information about ensembles, see `Ensemble configuration + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#ensemble>`__ + , defaults to None + :paramtype enable_stack_ensemble: typing.Optional[bool] + :keyword enable_vote_ensemble: Whether to enable/disable VotingEnsemble iteration. + For more information about ensembles, see `Ensemble configuration + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#ensemble>`__ + , defaults to None + :paramtype enable_vote_ensemble: typing.Optional[bool] + :keyword stack_ensemble_settings: Settings for StackEnsemble iteration, defaults to None + :paramtype stack_ensemble_settings: typing.Optional[StackEnsembleSettings] + :keyword ensemble_model_download_timeout: During VotingEnsemble and StackEnsemble model generation, + multiple fitted models from the previous child runs are downloaded. Configure this parameter with a + higher value than 300 secs, if more time is needed, defaults to None + :paramtype ensemble_model_download_timeout: typing.Optional[int] + :keyword allowed_training_algorithms: A list of model names to search for an experiment. If not specified, + then all models supported for the task are used minus any specified in ``blocked_training_algorithms`` + or deprecated TensorFlow models, defaults to None + :paramtype allowed_training_algorithms: typing.Optional[List[str]] + :keyword blocked_training_algorithms: A list of algorithms to ignore for an experiment, defaults to None + :paramtype blocked_training_algorithms: typing.Optional[List[str]] + :keyword training_mode: [Experimental] The training mode to use. + The possible values are- + + * distributed- enables distributed training for supported algorithms. + + * non_distributed- disables distributed training. + + * auto- Currently, it is same as non_distributed. In future, this might change. + + Note: This parameter is in public preview and may change in future. + :paramtype training_mode: typing.Optional[typing.Union[str, azure.ai.ml.constants.TabularTrainingMode]] + """ + # get training object by calling training getter of respective tabular task + self._training = self.training + if self._training is not None: + self._training.enable_onnx_compatible_models = ( + enable_onnx_compatible_models + if enable_onnx_compatible_models is not None + else self._training.enable_onnx_compatible_models + ) + self._training.enable_dnn_training = ( + enable_dnn_training if enable_dnn_training is not None else self._training.enable_dnn_training + ) + self._training.enable_model_explainability = ( + enable_model_explainability + if enable_model_explainability is not None + else self._training.enable_model_explainability + ) + self._training.enable_stack_ensemble = ( + enable_stack_ensemble if enable_stack_ensemble is not None else self._training.enable_stack_ensemble + ) + self._training.enable_vote_ensemble = ( + enable_vote_ensemble if enable_vote_ensemble is not None else self._training.enable_vote_ensemble + ) + self._training.stack_ensemble_settings = ( + stack_ensemble_settings + if stack_ensemble_settings is not None + else self._training.stack_ensemble_settings + ) + self._training.ensemble_model_download_timeout = ( + ensemble_model_download_timeout + if ensemble_model_download_timeout is not None + else self._training.ensemble_model_download_timeout + ) + + self._training.allowed_training_algorithms = allowed_training_algorithms + self._training.blocked_training_algorithms = blocked_training_algorithms + self._training.training_mode = training_mode if training_mode is not None else self._training.training_mode + + def set_featurization( + self, + *, + blocked_transformers: Optional[List[Union[BlockedTransformers, str]]] = None, + column_name_and_types: Optional[Dict[str, str]] = None, + dataset_language: Optional[str] = None, + transformer_params: Optional[Dict[str, List[ColumnTransformer]]] = None, + mode: Optional[str] = None, + enable_dnn_featurization: Optional[bool] = None, + ) -> None: + """Define feature engineering configuration. + + :keyword blocked_transformers: A list of transformer names to be blocked during featurization, defaults to None + :paramtype blocked_transformers: Optional[List[Union[BlockedTransformers, str]]] + :keyword column_name_and_types: A dictionary of column names and feature types used to update column purpose + , defaults to None + :paramtype column_name_and_types: Optional[Dict[str, str]] + :keyword dataset_language: Three character ISO 639-3 code for the language(s) contained in the dataset. + Languages other than English are only supported if you use GPU-enabled compute. The language_code + 'mul' should be used if the dataset contains multiple languages. To find ISO 639-3 codes for different + languages, please refer to https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes, defaults to None + :paramtype dataset_language: Optional[str] + :keyword transformer_params: A dictionary of transformer and corresponding customization parameters + , defaults to None + :paramtype transformer_params: Optional[Dict[str, List[ColumnTransformer]]] + :keyword mode: "off", "auto", defaults to "auto", defaults to None + :paramtype mode: Optional[str] + :keyword enable_dnn_featurization: Whether to include DNN based feature engineering methods, defaults to None + :paramtype enable_dnn_featurization: Optional[bool] + """ + self._featurization = self._featurization or TabularFeaturizationSettings() + self._featurization.blocked_transformers = ( + blocked_transformers if blocked_transformers is not None else self._featurization.blocked_transformers + ) + self._featurization.column_name_and_types = ( + column_name_and_types if column_name_and_types is not None else self._featurization.column_name_and_types + ) + self._featurization.dataset_language = ( + dataset_language if dataset_language is not None else self._featurization.dataset_language + ) + self._featurization.transformer_params = ( + transformer_params if transformer_params is not None else self._featurization.transformer_params + ) + self._featurization.mode = mode or self._featurization.mode + self._featurization.enable_dnn_featurization = ( + enable_dnn_featurization + if enable_dnn_featurization is not None + else self._featurization.enable_dnn_featurization + ) + + def set_data( + self, + *, + training_data: Input, + target_column_name: str, + weight_column_name: Optional[str] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + n_cross_validations: Optional[Union[str, int]] = None, + cv_split_column_names: Optional[List[str]] = None, + test_data: Optional[Input] = None, + test_data_size: Optional[float] = None, + ) -> None: + """Define data configuration. + + :keyword training_data: Training data. + :paramtype training_data: Input + :keyword target_column_name: Column name of the target column. + :paramtype target_column_name: str + :keyword weight_column_name: Weight column name, defaults to None + :paramtype weight_column_name: typing.Optional[str] + :keyword validation_data: Validation data, defaults to None + :paramtype validation_data: typing.Optional[Input] + :keyword validation_data_size: Validation data size, defaults to None + :paramtype validation_data_size: typing.Optional[float] + :keyword n_cross_validations: n_cross_validations, defaults to None + :paramtype n_cross_validations: typing.Optional[typing.Union[str, int]] + :keyword cv_split_column_names: cv_split_column_names, defaults to None + :paramtype cv_split_column_names: typing.Optional[typing.List[str]] + :keyword test_data: Test data, defaults to None + :paramtype test_data: typing.Optional[Input] + :keyword test_data_size: Test data size, defaults to None + :paramtype test_data_size: typing.Optional[float] + """ + self.target_column_name = target_column_name if target_column_name is not None else self.target_column_name + self.weight_column_name = weight_column_name if weight_column_name is not None else self.weight_column_name + self.training_data = training_data if training_data is not None else self.training_data + self.validation_data = validation_data if validation_data is not None else self.validation_data + self.validation_data_size = ( + validation_data_size if validation_data_size is not None else self.validation_data_size + ) + self.cv_split_column_names = ( + cv_split_column_names if cv_split_column_names is not None else self.cv_split_column_names + ) + self.n_cross_validations = n_cross_validations if n_cross_validations is not None else self.n_cross_validations + self.test_data = test_data if test_data is not None else self.test_data + self.test_data_size = test_data_size if test_data_size is not None else self.test_data_size + + def _validation_data_to_rest(self, rest_obj: "AutoMLTabular") -> None: + """Validation data serialization. + + :param rest_obj: Serialized object + :type rest_obj: AutoMLTabular + """ + if rest_obj.n_cross_validations: + n_cross_val = rest_obj.n_cross_validations + # Convert n_cross_validations int value to CustomNCrossValidations + if isinstance(n_cross_val, int) and n_cross_val > 1: + rest_obj.n_cross_validations = CustomNCrossValidations(value=n_cross_val) + # Convert n_cross_validations str value to AutoNCrossValidations + elif isinstance(n_cross_val, str): + rest_obj.n_cross_validations = AutoNCrossValidations() + + def _validation_data_from_rest(self) -> None: + """Validation data deserialization.""" + if self.n_cross_validations: + n_cross_val = self.n_cross_validations + # Convert n_cross_validations CustomNCrossValidations back into int value + if isinstance(n_cross_val, CustomNCrossValidations): + self.n_cross_validations = n_cross_val.value + # Convert n_cross_validations AutoNCrossValidations to str value + elif isinstance(n_cross_val, AutoNCrossValidations): + self.n_cross_validations = AutoMLConstants.AUTO + + def __eq__(self, other: object) -> bool: + """Return True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, AutoMLTabular): + return NotImplemented + + return ( + self.target_column_name == other.target_column_name + and self.weight_column_name == other.weight_column_name + and self.training_data == other.training_data + and self.validation_data == other.validation_data + and self.validation_data_size == other.validation_data_size + and self.cv_split_column_names == other.cv_split_column_names + and self.n_cross_validations == other.n_cross_validations + and self.test_data == other.test_data + and self.test_data_size == other.test_data_size + and self._featurization == other._featurization + and self._limits == other._limits + and self._training == other._training + ) + + def __ne__(self, other: object) -> bool: + """Check inequality between two AutoMLTabular objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/classification_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/classification_job.py new file mode 100644 index 00000000..6f5ab271 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/classification_job.py @@ -0,0 +1,352 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import Classification as RestClassification +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.tabular.automl_tabular import AutoMLTabular +from azure.ai.ml.entities._job.automl.tabular.featurization_settings import TabularFeaturizationSettings +from azure.ai.ml.entities._job.automl.tabular.limit_settings import TabularLimitSettings +from azure.ai.ml.entities._job.automl.training_settings import ( # noqa: F401 # pylint: disable=unused-import + ClassificationTrainingSettings, + TrainingSettings, +) +from azure.ai.ml.entities._util import load_from_dict + + +class ClassificationJob(AutoMLTabular): + """Configuration for AutoML Classification Job. + + :keyword primary_metric: The primary metric to use for optimization, defaults to None + :paramtype primary_metric: typing.Optional[str] + :keyword positive_label: Positive label for binary metrics calculation, defaults to None + :paramtype positive_label: typing.Optional[str] + :keyword featurization: Featurization settings. Defaults to None. + :paramtype featurization: typing.Optional[TabularFeaturizationSettings] + :keyword limits: Limits settings. Defaults to None. + :paramtype limits: typing.Optional[TabularLimitSettings] + :keyword training: Training settings. Defaults to None. + :paramtype training: typing.Optional[TrainingSettings] + :return: An instance of ClassificationJob object. + :rtype: ~azure.ai.ml.entities.automl.ClassificationJob + :raises ValueError: If primary_metric is not a valid primary metric + :raises ValueError: If positive_label is not a valid positive label + :raises ValueError: If featurization is not a valid featurization settings + :raises ValueError: If limits is not a valid limits settings + :raises ValueError: If training is not a valid training settings + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + primary_metric: Optional[str] = None, + positive_label: Optional[str] = None, + **kwargs: Any, + ) -> None: + """Initialize a new AutoML Classification task. + + :keyword primary_metric: The primary metric to use for optimization, defaults to None + :paramtype primary_metric: typing.Optional[str] + :keyword positive_label: Positive label for binary metrics calculation, defaults to None + :paramtype positive_label: typing.Optional[str] + :keyword featurization: featurization settings. Defaults to None. + :paramtype featurization: typing.Optional[TabularFeaturizationSettings] + :keyword limits: limits settings. Defaults to None. + :paramtype limits: typing.Optional[TabularLimitSettings] + :keyword training: training settings. Defaults to None. + :paramtype training: typing.Optional[TrainingSettings] + :raises ValueError: If primary_metric is not a valid primary metric + :raises ValueError: If positive_label is not a valid positive label + :raises ValueError: If featurization is not a valid featurization settings + :raises ValueError: If limits is not a valid limits settings + :raises ValueError: If training is not a valid training settings + """ + # Extract any task specific settings + featurization = kwargs.pop("featurization", None) + limits = kwargs.pop("limits", None) + training = kwargs.pop("training", None) + + super().__init__( + task_type=TaskType.CLASSIFICATION, + featurization=featurization, + limits=limits, + training=training, + **kwargs, + ) + + self.primary_metric = primary_metric or ClassificationJob._DEFAULT_PRIMARY_METRIC + self.positive_label = positive_label + + @property + def primary_metric(self) -> Union[str, ClassificationPrimaryMetrics]: + """The primary metric to use for optimization. + + :return: The primary metric to use for optimization. + :rtype: typing.Union[str, ClassificationPrimaryMetrics] + """ + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + """The primary metric to use for optimization setter. + + :param value: Primary metric to use for optimization. + :type value: typing.Union[str, ClassificationPrimaryMetrics] + """ + # TODO: better way to do this + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ClassificationJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + @property # type: ignore + def training(self) -> ClassificationTrainingSettings: + """Training Settings for AutoML Classification Job. + + :return: Training settings used for AutoML Classification Job. + :rtype: ClassificationTrainingSettings + """ + return self._training or ClassificationTrainingSettings() + + @training.setter + def training(self, value: Union[Dict, ClassificationTrainingSettings]) -> None: # pylint: disable=unused-argument + ... + + def _to_rest_object(self) -> JobBase: + """Convert ClassificationJob object to a REST object. + + :return: REST object representation of this object. + :rtype: JobBase + """ + classification_task = RestClassification( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + weight_column_name=self.weight_column_name, + cv_split_column_names=self.cv_split_column_names, + n_cross_validations=self.n_cross_validations, + test_data=self.test_data, + test_data_size=self.test_data_size, + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + limit_settings=self._limits._to_rest_object() if self._limits else None, + training_settings=self._training._to_rest_object() if self._training else None, + primary_metric=self.primary_metric, + positive_label=self.positive_label, + log_verbosity=self.log_verbosity, + ) + self._resolve_data_inputs(classification_task) + self._validation_data_to_rest(classification_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=classification_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ClassificationJob": + """Convert a REST object to ClassificationJob object. + + :param obj: ClassificationJob in Rest format. + :type obj: JobBase + :return: ClassificationJob objects. + :rtype: ClassificationJob + """ + + properties: RestAutoMLJob = obj.properties + task_details: RestClassification = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + classification_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + weight_column_name=task_details.weight_column_name, + cv_split_column_names=task_details.cv_split_column_names, + n_cross_validations=task_details.n_cross_validations, + test_data=task_details.test_data, + test_data_size=task_details.test_data_size, + featurization=( + TabularFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ), + limits=( + TabularLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + training=( + ClassificationTrainingSettings._from_rest_object(task_details.training_settings) + if task_details.training_settings + else None + ), + primary_metric=task_details.primary_metric, + positive_label=task_details.positive_label, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + classification_job._restore_data_inputs() + classification_job._validation_data_from_rest() + + return classification_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ClassificationJob": + """Load from a dictionary. + + :param data: dictionary representation of the object. + :type data: typing.Dict + :param context: dictionary containing the context. + :type context: typing.Dict + :param additional_message: additional message to be added to the error message. + :type additional_message: str + :return: ClassificationJob object. + :rtype: ClassificationJob + """ + from azure.ai.ml._schema.automl.table_vertical.classification import AutoMLClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLClassificationNodeSchema + + if kwargs.pop("inside_pipeline", False): + loaded_data = load_from_dict( + AutoMLClassificationNodeSchema, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(AutoMLClassificationSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ClassificationJob": + """Create an instance from a schema dictionary. + + :param loaded_data: dictionary containing the data. + :type loaded_data: typing.Dict + :return: ClassificationJob object. + :rtype: ClassificationJob + """ + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "weight_column_name": loaded_data.pop("weight_column_name", None), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + "cv_split_column_names": loaded_data.pop("cv_split_column_names", None), + "n_cross_validations": loaded_data.pop("n_cross_validations", None), + "test_data": loaded_data.pop("test_data", None), + "test_data_size": loaded_data.pop("test_data_size", None), + } + job = ClassificationJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + """Convert the object to a dictionary. + + :param inside_pipeline: whether the job is inside a pipeline or not, defaults to False + :type inside_pipeline: bool + :return: dictionary representation of the object. + :rtype: typing.Dict + """ + from azure.ai.ml._schema.automl.table_vertical.classification import AutoMLClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLClassificationNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = AutoMLClassificationNodeSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + else: + schema_dict = AutoMLClassificationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + """Returns True if both instances have the same values. + + This method check instances equality and returns True if both of + the instances have the same attributes with the same values. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + if not isinstance(other, ClassificationJob): + return NotImplemented + + if not super().__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + """Check inequality between two ImageLimitSettings objects. + + :param other: Any object + :type other: object + :return: True or False + :rtype: bool + """ + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/featurization_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/featurization_settings.py new file mode 100644 index 00000000..6ef2332e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/featurization_settings.py @@ -0,0 +1,170 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +import logging +from typing import Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import BlockedTransformers +from azure.ai.ml._restclient.v2023_04_01_preview.models import ColumnTransformer as RestColumnTransformer +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + TableVerticalFeaturizationSettings as RestTabularFeaturizationSettings, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLTransformerParameterKeys +from azure.ai.ml.entities._job.automl.featurization_settings import FeaturizationSettings, FeaturizationSettingsType +from azure.ai.ml.entities._mixins import RestTranslatableMixin + +module_logger = logging.getLogger(__name__) + + +class ColumnTransformer(RestTranslatableMixin): + """Column transformer settings. + + :param fields: The fields on which to perform custom featurization + :type field: List[str] + :param parameters: parameters used for custom featurization + :type parameters: Dict[str, Optional[str, float]] + """ + + def __init__( + self, + *, + fields: Optional[List[str]] = None, + parameters: Optional[Dict[str, Union[str, float]]] = None, + ): + self.fields = fields + self.parameters = parameters + + def _to_rest_object(self) -> RestColumnTransformer: + return RestColumnTransformer(fields=self.fields, parameters=self.parameters) + + @classmethod + def _from_rest_object(cls, obj: RestColumnTransformer) -> Optional["ColumnTransformer"]: + if obj: + fields = obj.fields + parameters = obj.parameters + return ColumnTransformer(fields=fields, parameters=parameters) + return None + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ColumnTransformer): + return NotImplemented + return self.fields == other.fields and self.parameters == other.parameters + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +class TabularFeaturizationSettings(FeaturizationSettings): + """Featurization settings for an AutoML Job.""" + + def __init__( + self, + *, + blocked_transformers: Optional[List[Union[BlockedTransformers, str]]] = None, + column_name_and_types: Optional[Dict[str, str]] = None, + dataset_language: Optional[str] = None, + transformer_params: Optional[Dict[str, List[ColumnTransformer]]] = None, + mode: Optional[str] = None, + enable_dnn_featurization: Optional[bool] = None, + ): + """ + :param blocked_transformers: A list of transformers to ignore when featurizing. + :type blocked_transformers: List[Union[BlockedTransformers, str]] + :param column_name_and_types: A dictionary of column names and feature types used to update column purpose. + :type column_name_and_types: Dict[str, str] + :param dataset_language: The language of the dataset. + :type dataset_language: str + :param transformer_params: A dictionary of transformers and their parameters. + :type transformer_params: Dict[str, List[ColumnTransformer]] + :param mode: The mode of the featurization. + :type mode: str + :param enable_dnn_featurization: Whether to enable DNN featurization. + :type enable_dnn_featurization: bool + :ivar type: Specifies the type of FeaturizationSettings. Set automatically to "Tabular" for this class. + :vartype type: str + """ + super().__init__(dataset_language=dataset_language) + self.blocked_transformers = blocked_transformers + self.column_name_and_types = column_name_and_types + self.transformer_params = transformer_params + self.mode = mode + self.enable_dnn_featurization = enable_dnn_featurization + self.type = FeaturizationSettingsType.TABULAR + + @property + def transformer_params(self) -> Optional[Dict[str, List[ColumnTransformer]]]: + """A dictionary of transformers and their parameters.""" + return self._transformer_params + + @transformer_params.setter + def transformer_params(self, value: Dict[str, List[ColumnTransformer]]) -> None: + self._transformer_params = ( + None + if not value + else {(AutoMLTransformerParameterKeys[camel_to_snake(k).upper()].value): v for k, v in value.items()} + ) + + @property + def blocked_transformers(self) -> Optional[List[Union[BlockedTransformers, str]]]: + """A list of transformers to ignore when featurizing.""" + return self._blocked_transformers + + @blocked_transformers.setter + def blocked_transformers(self, blocked_transformers_list: List[Union[BlockedTransformers, str]]) -> None: + self._blocked_transformers = ( + None + if blocked_transformers_list is None + else [BlockedTransformers[camel_to_snake(o)] for o in blocked_transformers_list] + ) + + def _to_rest_object(self) -> RestTabularFeaturizationSettings: + transformer_dict = {} + if self.transformer_params: + for key, settings in self.transformer_params.items(): + transformer_dict[key] = [o._to_rest_object() for o in settings] + return RestTabularFeaturizationSettings( + blocked_transformers=self.blocked_transformers, + column_name_and_types=self.column_name_and_types, + dataset_language=self.dataset_language, + mode=self.mode, + transformer_params=transformer_dict, + enable_dnn_featurization=self.enable_dnn_featurization, + ) + + @classmethod + def _from_rest_object(cls, obj: RestTabularFeaturizationSettings) -> "TabularFeaturizationSettings": + rest_transformers_params = obj.transformer_params + transformer_dict: Optional[Dict] = None + if rest_transformers_params: + transformer_dict = {} + for key, settings in rest_transformers_params.items(): + transformer_dict[key] = [ColumnTransformer._from_rest_object(o) for o in settings] + transformer_params = transformer_dict + + return TabularFeaturizationSettings( + blocked_transformers=obj.blocked_transformers, + column_name_and_types=obj.column_name_and_types, + dataset_language=obj.dataset_language, + transformer_params=transformer_params, + mode=obj.mode, + enable_dnn_featurization=obj.enable_dnn_featurization, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TabularFeaturizationSettings): + return NotImplemented + return ( + super().__eq__(other) + and self.blocked_transformers == other.blocked_transformers + and self.column_name_and_types == other.column_name_and_types + and self.transformer_params == other.transformer_params + and self.mode == other.mode + and self.enable_dnn_featurization == other.enable_dnn_featurization + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_job.py new file mode 100644 index 00000000..9bd10b19 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_job.py @@ -0,0 +1,686 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import Forecasting as RestForecasting +from azure.ai.ml._restclient.v2023_04_01_preview.models import ForecastingPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants import TabularTrainingMode +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.stack_ensemble_settings import StackEnsembleSettings +from azure.ai.ml.entities._job.automl.tabular.automl_tabular import AutoMLTabular +from azure.ai.ml.entities._job.automl.tabular.featurization_settings import TabularFeaturizationSettings +from azure.ai.ml.entities._job.automl.tabular.forecasting_settings import ForecastingSettings +from azure.ai.ml.entities._job.automl.tabular.limit_settings import TabularLimitSettings +from azure.ai.ml.entities._job.automl.training_settings import ForecastingTrainingSettings +from azure.ai.ml.entities._util import load_from_dict + + +class ForecastingJob(AutoMLTabular): + """ + Configuration for AutoML Forecasting Task. + + :param primary_metric: The primary metric to use for model selection. + :type primary_metric: Optional[str] + :param forecasting_settings: The settings for the forecasting task. + :type forecasting_settings: + Optional[~azure.ai.ml.automl.ForecastingSettings] + :param kwargs: Job-specific arguments + :type kwargs: Dict[str, Any] + """ + + _DEFAULT_PRIMARY_METRIC = ForecastingPrimaryMetrics.NORMALIZED_ROOT_MEAN_SQUARED_ERROR + + def __init__( + self, + *, + primary_metric: Optional[str] = None, + forecasting_settings: Optional[ForecastingSettings] = None, + **kwargs: Any, + ) -> None: + """Initialize a new AutoML Forecasting task.""" + # Extract any task specific settings + featurization = kwargs.pop("featurization", None) + limits = kwargs.pop("limits", None) + training = kwargs.pop("training", None) + + super().__init__( + task_type=TaskType.FORECASTING, + featurization=featurization, + limits=limits, + training=training, + **kwargs, + ) + + self.primary_metric = primary_metric or ForecastingJob._DEFAULT_PRIMARY_METRIC + self._forecasting_settings = forecasting_settings + + @property + def primary_metric(self) -> Optional[str]: + """ + Return the primary metric to use for model selection. + + :return: The primary metric for model selection. + :rtype: Optional[str] + """ + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ForecastingPrimaryMetrics]) -> None: + """ + Set the primary metric to use for model selection. + + :param value: The primary metric for model selection. + :type: Union[str, ~azure.ai.ml.automl.ForecastingPrimaryMetrics] + """ + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + ForecastingJob._DEFAULT_PRIMARY_METRIC + if value is None + else ForecastingPrimaryMetrics[camel_to_snake(value).upper()] + ) + + @property # type: ignore + def training(self) -> ForecastingTrainingSettings: + """ + Return the forecast training settings. + + :return: training settings. + :rtype: ~azure.ai.ml.automl.ForecastingTrainingSettings + """ + return self._training or ForecastingTrainingSettings() + + @training.setter + def training(self, value: Union[Dict, ForecastingTrainingSettings]) -> None: # pylint: disable=unused-argument + ... + + @property + def forecasting_settings(self) -> Optional[ForecastingSettings]: + """ + Return the forecast settings. + + :return: forecast settings. + :rtype: ~azure.ai.ml.automl.ForecastingSettings + """ + return self._forecasting_settings + + def set_forecast_settings( + self, + *, + time_column_name: Optional[str] = None, + forecast_horizon: Optional[Union[str, int]] = None, + time_series_id_column_names: Optional[Union[str, List[str]]] = None, + target_lags: Optional[Union[str, int, List[int]]] = None, + feature_lags: Optional[str] = None, + target_rolling_window_size: Optional[Union[str, int]] = None, + country_or_region_for_holidays: Optional[str] = None, + use_stl: Optional[str] = None, + seasonality: Optional[Union[str, int]] = None, + short_series_handling_config: Optional[str] = None, + frequency: Optional[str] = None, + target_aggregate_function: Optional[str] = None, + cv_step_size: Optional[int] = None, + features_unknown_at_forecast_time: Optional[Union[str, List[str]]] = None, + ) -> None: + """Manage parameters used by forecasting tasks. + + :keyword time_column_name: + The name of the time column. This parameter is required when forecasting to specify the datetime + column in the input data used for building the time series and inferring its frequency. + :paramtype time_column_name: Optional[str] + :keyword forecast_horizon: + The desired maximum forecast horizon in units of time-series frequency. The default value is 1. + + Units are based on the time interval of your training data, e.g., monthly, weekly that the forecaster + should predict out. When task type is forecasting, this parameter is required. For more information on + setting forecasting parameters, see `Auto-train a time-series forecast model <https://learn.microsoft.com/ + azure/machine-learning/how-to-auto-train-forecast>`_. + :type forecast_horizon: Optional[Union[int, str]] + :keyword time_series_id_column_names: + The names of columns used to group a time series. + It can be used to create multiple series. If time series id column names is not defined or + the identifier columns specified do not identify all the series in the dataset, the time series identifiers + will be automatically created for your data set. + :paramtype time_series_id_column_names: Optional[Union[str, List[str]]] + :keyword target_lags: The number of past periods to lag from the target column. By default the lags are turned + off. + + When forecasting, this parameter represents the number of rows to lag the target values based + on the frequency of the data. This is represented as a list or single integer. Lag should be used + when the relationship between the independent variables and dependent variable do not match up or + correlate by default. For example, when trying to forecast demand for a product, the demand in any + month may depend on the price of specific commodities 3 months prior. In this example, you may want + to lag the target (demand) negatively by 3 months so that the model is training on the correct + relationship. For more information, see `Auto-train a time-series forecast model + <https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-forecast>`_. + + **Note on auto detection of target lags and rolling window size. + Please see the corresponding comments in the rolling window section.** + We use the next algorithm to detect the optimal target lag and rolling window size. + + #. Estimate the maximum lag order for the look back feature selection. In our case it is the number of + periods till the next date frequency granularity i.e. if frequency is daily, it will be a week (7), + if it is a week, it will be month (4). That values multiplied by two is the largest + possible values of lags/rolling windows. In our examples, we will consider the maximum lag + order of 14 and 8 respectively). + #. Create a de-seasonalized series by adding trend and residual components. This will be used + in the next step. + #. Estimate the PACF - Partial Auto Correlation Function on the on the data from (2) + and search for points, where the auto correlation is significant i.e. its absolute + value is more then 1.96/square_root(maximal lag value), which correspond to significance of 95%. + #. If all points are significant, we consider it being strong seasonality + and do not create look back features. + #. We scan the PACF values from the beginning and the value before the first insignificant + auto correlation will designate the lag. If first significant element (value correlate with + itself) is followed by insignificant, the lag will be 0 and we will not use look back features. + + :type target_lags: Optional[Union[str, int, List[int]]] + :keyword feature_lags: Flag for generating lags for the numeric features with 'auto' or None. + :paramtype feature_lags: Optional[str] + :keyword target_rolling_window_size: The number of past periods used to create a rolling window average of the + target column. + + When forecasting, this parameter represents `n` historical periods to use to generate forecasted values, + <= training set size. If omitted, `n` is the full training set size. Specify this parameter + when you only want to consider a certain amount of history when training the model. + If set to 'auto', rolling window will be estimated as the last + value where the PACF is more then the significance threshold. Please see target_lags section for details. + :paramtype target_rolling_window_size: Optional[Union[str, int]] + :keyword country_or_region_for_holidays: The country/region used to generate holiday features. + These should be ISO 3166 two-letter country/region codes, for example 'US' or 'GB'. + :paramtype country_or_region_for_holidays: Optional[str] + :keyword use_stl: Configure STL Decomposition of the time-series target column. + use_stl can take three values: None (default) - no stl decomposition, 'season' - only generate + season component and season_trend - generate both season and trend components. + :type use_stl: Optional[str] + :keyword seasonality: Set time series seasonality as an integer multiple of the series frequency. + If seasonality is set to 'auto', it will be inferred. + If set to None, the time series is assumed non-seasonal which is equivalent to seasonality=1. + :paramtype seasonality: Optional[Union[int, str] + :keyword short_series_handling_config: + The parameter defining how if AutoML should handle short time series. + + Possible values: 'auto' (default), 'pad', 'drop' and None. + + * **auto** short series will be padded if there are no long series, + otherwise short series will be dropped. + * **pad** all the short series will be padded. + * **drop** all the short series will be dropped". + * **None** the short series will not be modified. + + If set to 'pad', the table will be padded with the zeroes and + empty values for the regressors and random values for target with the mean + equal to target value median for given time series id. If median is more or equal + to zero, the minimal padded value will be clipped by zero: + Input: + + +------------+---------------+----------+--------+ + | Date | numeric_value | string | target | + +============+===============+==========+========+ + | 2020-01-01 | 23 | green | 55 | + +------------+---------------+----------+--------+ + + Output assuming minimal number of values is four: + + +------------+---------------+----------+--------+ + | Date | numeric_value | string | target | + +============+===============+==========+========+ + | 2019-12-29 | 0 | NA | 55.1 | + +------------+---------------+----------+--------+ + | 2019-12-30 | 0 | NA | 55.6 | + +------------+---------------+----------+--------+ + | 2019-12-31 | 0 | NA | 54.5 | + +------------+---------------+----------+--------+ + | 2020-01-01 | 23 | green | 55 | + +------------+---------------+----------+--------+ + + **Note:** We have two parameters short_series_handling_configuration and + legacy short_series_handling. When both parameters are set we are + synchronize them as shown in the table below (short_series_handling_configuration and + short_series_handling for brevity are marked as handling_configuration and handling + respectively). + + +------------+--------------------------+----------------------+-----------------------------+ + | | handling | | handling | | resulting | | resulting | + | | | configuration | | handling | | handling | + | | | | | configuration | + +============+==========================+======================+=============================+ + | True | auto | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | pad | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | drop | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | None | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | auto | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | pad | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | drop | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | None | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + + :type short_series_handling_config: Optional[str] + :keyword frequency: Forecast frequency. + + When forecasting, this parameter represents the period with which the forecast is desired, + for example daily, weekly, yearly, etc. The forecast frequency is dataset frequency by default. + You can optionally set it to greater (but not lesser) than dataset frequency. + We'll aggregate the data and generate the results at forecast frequency. For example, + for daily data, you can set the frequency to be daily, weekly or monthly, but not hourly. + The frequency needs to be a pandas offset alias. + Please refer to pandas documentation for more information: + https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects + :type frequency: Optional[str] + :keyword target_aggregate_function: The function to be used to aggregate the time series target + column to conform to a user specified frequency. If the target_aggregation_function is set, + but the freq parameter is not set, the error is raised. The possible target aggregation + functions are: "sum", "max", "min" and "mean". + + * The target column values are aggregated based on the specified operation. + Typically, sum is appropriate for most scenarios. + * Numerical predictor columns in your data are aggregated by sum, mean, minimum value, + and maximum value. As a result, automated ML generates new columns suffixed with the + aggregation function name and applies the selected aggregate operation. + * For categorical predictor columns, the data is aggregated by mode, + the most prominent category in the window. + * Date predictor columns are aggregated by minimum value, maximum value and mode. + + +----------------+-------------------------------+--------------------------------------+ + | | freq | | target_aggregation_function | | Data regularity | + | | | | fixing mechanism | + +================+===============================+======================================+ + | None (Default) | None (Default) | | The aggregation | + | | | | is not applied. | + | | | | If the valid | + | | | | frequency can | + | | | | not be | + | | | | determined | + | | | | the error | + | | | | will be raised. | + +----------------+-------------------------------+--------------------------------------+ + | Some Value | None (Default) | | The aggregation | + | | | | is not applied. | + | | | | If the number | + | | | | of data points | + | | | | compliant to | + | | | | given frequency | + | | | | grid is | + | | | | less then 90% | + | | | | these points | + | | | | will be | + | | | | removed, | + | | | | otherwise | + | | | | the error will | + | | | | be raised. | + +----------------+-------------------------------+--------------------------------------+ + | None (Default) | Aggregation function | | The error about | + | | | | missing | + | | | | frequency | + | | | | parameter is | + | | | | raised. | + +----------------+-------------------------------+--------------------------------------+ + | Some Value | Aggregation function | | Aggregate to | + | | | | frequency using | + | | | | provided | + | | | | aggregation | + | | | | function. | + +----------------+-------------------------------+--------------------------------------+ + + :type target_aggregate_function: Optional[str] + :keyword cv_step_size: Number of periods between the origin_time of one CV fold and the next fold. + For example, if `n_step` = 3 for daily data, the origin time for each fold will be three days apart. + :paramtype cv_step_size: Optional[int] + :keyword features_unknown_at_forecast_time: The feature columns that are available for training but + unknown at the time of forecast/inference. If features_unknown_at_forecast_time is set to an empty + list, it is assumed that all the feature columns in the dataset are known at inference time. If this + parameter is not set the support for future features is not enabled. + :paramtype features_unknown_at_forecast_time: Optional[Union[str, List[str]]] + """ + self._forecasting_settings = self._forecasting_settings or ForecastingSettings() + + self._forecasting_settings.country_or_region_for_holidays = ( + country_or_region_for_holidays + if country_or_region_for_holidays is not None + else self._forecasting_settings.country_or_region_for_holidays + ) + self._forecasting_settings.cv_step_size = ( + cv_step_size if cv_step_size is not None else self._forecasting_settings.cv_step_size + ) + self._forecasting_settings.forecast_horizon = ( + forecast_horizon if forecast_horizon is not None else self._forecasting_settings.forecast_horizon + ) + self._forecasting_settings.target_lags = ( + target_lags if target_lags is not None else self._forecasting_settings.target_lags + ) + self._forecasting_settings.target_rolling_window_size = ( + target_rolling_window_size + if target_rolling_window_size is not None + else self._forecasting_settings.target_rolling_window_size + ) + self._forecasting_settings.frequency = ( + frequency if frequency is not None else self._forecasting_settings.frequency + ) + self._forecasting_settings.feature_lags = ( + feature_lags if feature_lags is not None else self._forecasting_settings.feature_lags + ) + self._forecasting_settings.seasonality = ( + seasonality if seasonality is not None else self._forecasting_settings.seasonality + ) + self._forecasting_settings.use_stl = use_stl if use_stl is not None else self._forecasting_settings.use_stl + self._forecasting_settings.short_series_handling_config = ( + short_series_handling_config + if short_series_handling_config is not None + else self._forecasting_settings.short_series_handling_config + ) + self._forecasting_settings.target_aggregate_function = ( + target_aggregate_function + if target_aggregate_function is not None + else self._forecasting_settings.target_aggregate_function + ) + self._forecasting_settings.time_column_name = ( + time_column_name if time_column_name is not None else self._forecasting_settings.time_column_name + ) + self._forecasting_settings.time_series_id_column_names = ( + time_series_id_column_names + if time_series_id_column_names is not None + else self._forecasting_settings.time_series_id_column_names + ) + self._forecasting_settings.features_unknown_at_forecast_time = ( + features_unknown_at_forecast_time + if features_unknown_at_forecast_time is not None + else self._forecasting_settings.features_unknown_at_forecast_time + ) + + # override + def set_training( + self, + *, + enable_onnx_compatible_models: Optional[bool] = None, + enable_dnn_training: Optional[bool] = None, + enable_model_explainability: Optional[bool] = None, + enable_stack_ensemble: Optional[bool] = None, + enable_vote_ensemble: Optional[bool] = None, + stack_ensemble_settings: Optional[StackEnsembleSettings] = None, + ensemble_model_download_timeout: Optional[int] = None, + allowed_training_algorithms: Optional[List[str]] = None, + blocked_training_algorithms: Optional[List[str]] = None, + training_mode: Optional[Union[str, TabularTrainingMode]] = None, + ) -> None: + """ + The method to configure forecast training related settings. + + :keyword enable_onnx_compatible_models: + Whether to enable or disable enforcing the ONNX-compatible models. + The default is False. For more information about Open Neural Network Exchange (ONNX) and Azure Machine + Learning, see this `article <https://learn.microsoft.com/azure/machine-learning/concept-onnx>`__. + :type enable_onnx_compatible: Optional[bool] + :keyword enable_dnn_training: + Whether to include DNN based models during model selection. + However, the default is True for DNN NLP tasks, and it's False for all other AutoML tasks. + :paramtype enable_dnn_training: Optional[bool] + :keyword enable_model_explainability: + Whether to enable explaining the best AutoML model at the end of all AutoML training iterations. + For more information, see `Interpretability: model explanations in automated machine learning + <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__. + , defaults to None + :type enable_model_explainability: Optional[bool] + :keyword enable_stack_ensemble: + Whether to enable/disable StackEnsemble iteration. + If `enable_onnx_compatible_models` flag is being set, then StackEnsemble iteration will be disabled. + Similarly, for Timeseries tasks, StackEnsemble iteration will be disabled by default, to avoid risks of + overfitting due to small training set used in fitting the meta learner. + For more information about ensembles, see `Ensemble configuration + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#ensemble>`__ + , defaults to None + :type enable_stack_ensemble: Optional[bool] + :keyword enable_vote_ensemble: + Whether to enable/disable VotingEnsemble iteration. + For more information about ensembles, see `Ensemble configuration + <https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#ensemble>`__ + , defaults to None + :type enable_vote_ensemble: Optional[bool] + :keyword stack_ensemble_settings: + Settings for StackEnsemble iteration, defaults to None + :paramtype stack_ensemble_settings: Optional[StackEnsembleSettings] + :keyword ensemble_model_download_timeout: + During VotingEnsemble and StackEnsemble model generation, + multiple fitted models from the previous child runs are downloaded. Configure this parameter with a + higher value than 300 secs, if more time is needed, defaults to None + :paramtype ensemble_model_download_timeout: Optional[int] + :keyword allowed_training_algorithms: + A list of model names to search for an experiment. If not specified, + then all models supported for the task are used minus any specified in ``blocked_training_algorithms`` + or deprecated TensorFlow models, defaults to None + :paramtype allowed_training_algorithms: Optional[List[str]] + :keyword blocked_training_algorithms: + A list of algorithms to ignore for an experiment, defaults to None + :paramtype blocked_training_algorithms: Optional[List[str]] + :keyword training_mode: + [Experimental] The training mode to use. + The possible values are- + + * distributed- enables distributed training for supported algorithms. + + * non_distributed- disables distributed training. + + * auto- Currently, it is same as non_distributed. In future, this might change. + + Note: This parameter is in public preview and may change in future. + :type training_mode: Optional[Union[~azure.ai.ml.constants.TabularTrainingMode, str]] + """ + super().set_training( + enable_onnx_compatible_models=enable_onnx_compatible_models, + enable_dnn_training=enable_dnn_training, + enable_model_explainability=enable_model_explainability, + enable_stack_ensemble=enable_stack_ensemble, + enable_vote_ensemble=enable_vote_ensemble, + stack_ensemble_settings=stack_ensemble_settings, + ensemble_model_download_timeout=ensemble_model_download_timeout, + allowed_training_algorithms=allowed_training_algorithms, + blocked_training_algorithms=blocked_training_algorithms, + training_mode=training_mode, + ) + + # Disable stack ensemble by default, since it is currently not supported for forecasting tasks + if enable_stack_ensemble is None: + if self._training is not None: + self._training.enable_stack_ensemble = False + + def _to_rest_object(self) -> JobBase: + if self._forecasting_settings is not None: + forecasting_task = RestForecasting( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + weight_column_name=self.weight_column_name, + cv_split_column_names=self.cv_split_column_names, + n_cross_validations=self.n_cross_validations, + test_data=self.test_data, + test_data_size=self.test_data_size, + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + limit_settings=self._limits._to_rest_object() if self._limits else None, + training_settings=self._training._to_rest_object() if self._training else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + forecasting_settings=self._forecasting_settings._to_rest_object(), + ) + else: + forecasting_task = RestForecasting( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + weight_column_name=self.weight_column_name, + cv_split_column_names=self.cv_split_column_names, + n_cross_validations=self.n_cross_validations, + test_data=self.test_data, + test_data_size=self.test_data_size, + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + limit_settings=self._limits._to_rest_object() if self._limits else None, + training_settings=self._training._to_rest_object() if self._training else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + forecasting_settings=None, + ) + + self._resolve_data_inputs(forecasting_task) + self._validation_data_to_rest(forecasting_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=forecasting_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "ForecastingJob": + properties: RestAutoMLJob = obj.properties + task_details: RestForecasting = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + forecasting_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + weight_column_name=task_details.weight_column_name, + cv_split_column_names=task_details.cv_split_column_names, + n_cross_validations=task_details.n_cross_validations, + test_data=task_details.test_data, + test_data_size=task_details.test_data_size, + featurization=( + TabularFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ), + limits=( + TabularLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + training=( + ForecastingTrainingSettings._from_rest_object(task_details.training_settings) + if task_details.training_settings + else None + ), + primary_metric=task_details.primary_metric, + forecasting_settings=( + ForecastingSettings._from_rest_object(task_details.forecasting_settings) + if task_details.forecasting_settings + else None + ), + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + forecasting_job._restore_data_inputs() + forecasting_job._validation_data_from_rest() + + return forecasting_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "ForecastingJob": + from azure.ai.ml._schema.automl.table_vertical.forecasting import AutoMLForecastingSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLForecastingNodeSchema + + if kwargs.pop("inside_pipeline", False): + loaded_data = load_from_dict(AutoMLForecastingNodeSchema, data, context, additional_message, **kwargs) + else: + loaded_data = load_from_dict(AutoMLForecastingSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "ForecastingJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "weight_column_name": loaded_data.pop("weight_column_name", None), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + "cv_split_column_names": loaded_data.pop("cv_split_column_names", None), + "n_cross_validations": loaded_data.pop("n_cross_validations", None), + "test_data": loaded_data.pop("test_data", None), + "test_data_size": loaded_data.pop("test_data_size", None), + } + job = ForecastingJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.table_vertical.forecasting import AutoMLForecastingSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLForecastingNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = AutoMLForecastingNodeSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + else: + schema_dict = AutoMLForecastingSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ForecastingJob): + return NotImplemented + + if not super(ForecastingJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric and self._forecasting_settings == other._forecasting_settings + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_settings.py new file mode 100644 index 00000000..09439483 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/forecasting_settings.py @@ -0,0 +1,383 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=too-many-instance-attributes + +from typing import List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + AutoForecastHorizon, + AutoSeasonality, + AutoTargetLags, + AutoTargetRollingWindowSize, + CustomForecastHorizon, + CustomSeasonality, + CustomTargetLags, + CustomTargetRollingWindowSize, + ForecastHorizonMode, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ForecastingSettings as RestForecastingSettings, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + SeasonalityMode, + TargetLagsMode, + TargetRollingWindowSizeMode, +) +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class ForecastingSettings(RestTranslatableMixin): + """Forecasting settings for an AutoML Job. + + :param country_or_region_for_holidays: The country/region used to generate holiday features. These should be ISO + 3166 two-letter country/region code, for example 'US' or 'GB'. + :type country_or_region_for_holidays: Optional[str] + :param cv_step_size: + Number of periods between the origin_time of one CV fold and the next fold. For + example, if `n_step` = 3 for daily data, the origin time for each fold will be + three days apart. + :type cv_step_size: Optional[int] + :param forecast_horizon: + The desired maximum forecast horizon in units of time-series frequency. The default value is 1. + + Units are based on the time interval of your training data, e.g., monthly, weekly that the forecaster + should predict out. When task type is forecasting, this parameter is required. For more information on + setting forecasting parameters, see `Auto-train a time-series forecast model <https://learn.microsoft.com/ + azure/machine-learning/how-to-auto-train-forecast>`_. + :type forecast_horizon: Optional[Union[int, str]] + :param target_lags: + The number of past periods to lag from the target column. By default the lags are turned off. + + When forecasting, this parameter represents the number of rows to lag the target values based + on the frequency of the data. This is represented as a list or single integer. Lag should be used + when the relationship between the independent variables and dependent variable do not match up or + correlate by default. For example, when trying to forecast demand for a product, the demand in any + month may depend on the price of specific commodities 3 months prior. In this example, you may want + to lag the target (demand) negatively by 3 months so that the model is training on the correct + relationship. For more information, see `Auto-train a time-series forecast model + <https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-forecast>`_. + + **Note on auto detection of target lags and rolling window size. + Please see the corresponding comments in the rolling window section.** + We use the next algorithm to detect the optimal target lag and rolling window size. + + #. Estimate the maximum lag order for the look back feature selection. In our case it is the number of + periods till the next date frequency granularity i.e. if frequency is daily, it will be a week (7), + if it is a week, it will be month (4). That values multiplied by two is the largest + possible values of lags/rolling windows. In our examples, we will consider the maximum lag + order of 14 and 8 respectively). + #. Create a de-seasonalized series by adding trend and residual components. This will be used + in the next step. + #. Estimate the PACF - Partial Auto Correlation Function on the on the data from (2) + and search for points, where the auto correlation is significant i.e. its absolute + value is more then 1.96/square_root(maximal lag value), which correspond to significance of 95%. + #. If all points are significant, we consider it being strong seasonality + and do not create look back features. + #. We scan the PACF values from the beginning and the value before the first insignificant + auto correlation will designate the lag. If first significant element (value correlate with + itself) is followed by insignificant, the lag will be 0 and we will not use look back features. + :type target_lags: Union[str, int, List[int]] + :param target_rolling_window_size: + The number of past periods used to create a rolling window average of the target column. + + When forecasting, this parameter represents `n` historical periods to use to generate forecasted values, + <= training set size. If omitted, `n` is the full training set size. Specify this parameter + when you only want to consider a certain amount of history when training the model. + If set to 'auto', rolling window will be estimated as the last + value where the PACF is more then the significance threshold. Please see target_lags section for details. + :type target_rolling_window_size: Optional[Union[str, int]] + :param frequency: Forecast frequency. + + When forecasting, this parameter represents the period with which the forecast is desired, + for example daily, weekly, yearly, etc. The forecast frequency is dataset frequency by default. + You can optionally set it to greater (but not lesser) than dataset frequency. + We'll aggregate the data and generate the results at forecast frequency. For example, + for daily data, you can set the frequency to be daily, weekly or monthly, but not hourly. + The frequency needs to be a pandas offset alias. + Please refer to pandas documentation for more information: + https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects + :type frequency: Optional[str] + :param feature_lags: Flag for generating lags for the numeric features with 'auto' or None. + :type feature_lags: Optional[str] + :param seasonality: Set time series seasonality as an integer multiple of the series frequency. + If seasonality is set to 'auto', it will be inferred. + If set to None, the time series is assumed non-seasonal which is equivalent to seasonality=1. + :type seasonality: Optional[Union[int, str]] + :param use_stl: Configure STL Decomposition of the time-series target column. + use_stl can take three values: None (default) - no stl decomposition, 'season' - only generate + season component and season_trend - generate both season and trend components. + :type use_stl: Optional[str] + :param short_series_handling_config: + The parameter defining how if AutoML should handle short time series. + + Possible values: 'auto' (default), 'pad', 'drop' and None. + * **auto** short series will be padded if there are no long series, + otherwise short series will be dropped. + * **pad** all the short series will be padded. + * **drop** all the short series will be dropped". + * **None** the short series will not be modified. + If set to 'pad', the table will be padded with the zeroes and + empty values for the regressors and random values for target with the mean + equal to target value median for given time series id. If median is more or equal + to zero, the minimal padded value will be clipped by zero. + Input: + + +------------+---------------+----------+--------+ + | Date | numeric_value | string | target | + +============+===============+==========+========+ + | 2020-01-01 | 23 | green | 55 | + +------------+---------------+----------+--------+ + + Output assuming minimal number of values is four: + + +------------+---------------+----------+--------+ + | Date | numeric_value | string | target | + +============+===============+==========+========+ + | 2019-12-29 | 0 | NA | 55.1 | + +------------+---------------+----------+--------+ + | 2019-12-30 | 0 | NA | 55.6 | + +------------+---------------+----------+--------+ + | 2019-12-31 | 0 | NA | 54.5 | + +------------+---------------+----------+--------+ + | 2020-01-01 | 23 | green | 55 | + +------------+---------------+----------+--------+ + + **Note:** We have two parameters short_series_handling_configuration and + legacy short_series_handling. When both parameters are set we are + synchronize them as shown in the table below (short_series_handling_configuration and + short_series_handling for brevity are marked as handling_configuration and handling + respectively). + + +------------+--------------------------+----------------------+-----------------------------+ + | | handling | | handling configuration | | resulting handling | | resulting handling | + | | | | | configuration | + +============+==========================+======================+=============================+ + | True | auto | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | pad | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | drop | True | auto | + +------------+--------------------------+----------------------+-----------------------------+ + | True | None | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | auto | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | pad | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | drop | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + | False | None | False | None | + +------------+--------------------------+----------------------+-----------------------------+ + + :type short_series_handling_config: Optional[str] + :param target_aggregate_function: The function to be used to aggregate the time series target + column to conform to a user specified frequency. If the + target_aggregation_function is set, but the freq parameter + is not set, the error is raised. The possible target + aggregation functions are: "sum", "max", "min" and "mean". + + * The target column values are aggregated based on the specified operation. + Typically, sum is appropriate for most scenarios. + * Numerical predictor columns in your data are aggregated by sum, mean, minimum value, + and maximum value. As a result, automated ML generates new columns suffixed with the + aggregation function name and applies the selected aggregate operation. + * For categorical predictor columns, the data is aggregated by mode, + the most prominent category in the window. + * Date predictor columns are aggregated by minimum value, maximum value and mode. + + +----------------+-------------------------------+--------------------------------------+ + | | freq | | target_aggregation_function | | Data regularity | + | | | | fixing mechanism | + +================+===============================+======================================+ + | None (Default) | None (Default) | | The aggregation is not | + | | | | applied. If the valid | + | | | | frequency can not be | + | | | | determined the error will | + | | | | be raised. | + +----------------+-------------------------------+--------------------------------------+ + | Some Value | None (Default) | | The aggregation is not | + | | | | applied. If the number | + | | | | of data points compliant | + | | | | to given frequency grid | + | | | | is less then 90% these points | + | | | | will be removed, otherwise | + | | | | the error will be raised. | + +----------------+-------------------------------+--------------------------------------+ + | None (Default) | Aggregation function | | The error about missing | + | | | | frequency parameter | + | | | | is raised. | + +----------------+-------------------------------+--------------------------------------+ + | Some Value | Aggregation function | | Aggregate to frequency using | + | | | | provided aggregation function. | + +----------------+-------------------------------+--------------------------------------+ + :type target_aggregate_function: str + :param time_column_name: + The name of the time column. This parameter is required when forecasting to specify the datetime + column in the input data used for building the time series and inferring its frequency. + :type time_column_name: Optional[str] + :param time_series_id_column_names: + The names of columns used to group a timeseries. + It can be used to create multiple series. If time series id column names is not defined or + the identifier columns specified do not identify all the series in the dataset, the time series identifiers + will be automatically created for your dataset. + :type time_series_id_column_names: Union[str, List[str]] + :param features_unknown_at_forecast_time: + The feature columns that are available for training but unknown at the time of forecast/inference. + If features_unknown_at_forecast_time is set to an empty list, it is assumed that + all the feature columns in the dataset are known at inference time. If this parameter is not set + the support for future features is not enabled. + :type features_unknown_at_forecast_time: Optional[Union[str, List[str]]] + """ + + def __init__( + self, + *, + country_or_region_for_holidays: Optional[str] = None, + cv_step_size: Optional[int] = None, + forecast_horizon: Optional[Union[str, int]] = None, + target_lags: Optional[Union[str, int, List[int]]] = None, + target_rolling_window_size: Optional[Union[str, int]] = None, + frequency: Optional[str] = None, + feature_lags: Optional[str] = None, + seasonality: Optional[Union[str, int]] = None, + use_stl: Optional[str] = None, + short_series_handling_config: Optional[str] = None, + target_aggregate_function: Optional[str] = None, + time_column_name: Optional[str] = None, + time_series_id_column_names: Optional[Union[str, List[str]]] = None, + features_unknown_at_forecast_time: Optional[Union[str, List[str]]] = None, + ): + self.country_or_region_for_holidays = country_or_region_for_holidays + self.cv_step_size = cv_step_size + self.forecast_horizon = forecast_horizon + self.target_lags = target_lags + self.target_rolling_window_size = target_rolling_window_size + self.frequency = frequency + self.feature_lags = feature_lags + self.seasonality = seasonality + self.use_stl = use_stl + self.short_series_handling_config = short_series_handling_config + self.target_aggregate_function = target_aggregate_function + self.time_column_name = time_column_name + self.time_series_id_column_names = time_series_id_column_names + self.features_unknown_at_forecast_time = features_unknown_at_forecast_time + + def _to_rest_object(self) -> RestForecastingSettings: + forecast_horizon = None + if isinstance(self.forecast_horizon, str): + forecast_horizon = AutoForecastHorizon() + elif self.forecast_horizon: + forecast_horizon = CustomForecastHorizon(value=self.forecast_horizon) + + target_lags = None + if isinstance(self.target_lags, str): + target_lags = AutoTargetLags() + elif self.target_lags: + lags = [self.target_lags] if not isinstance(self.target_lags, list) else self.target_lags + target_lags = CustomTargetLags(values=lags) + + target_rolling_window_size = None + if isinstance(self.target_rolling_window_size, str): + target_rolling_window_size = AutoTargetRollingWindowSize() + elif self.target_rolling_window_size: + target_rolling_window_size = CustomTargetRollingWindowSize(value=self.target_rolling_window_size) + + seasonality = None + if isinstance(self.seasonality, str): + seasonality = AutoSeasonality() + elif self.seasonality: + seasonality = CustomSeasonality(value=self.seasonality) + + time_series_id_column_names = self.time_series_id_column_names + if isinstance(self.time_series_id_column_names, str) and self.time_series_id_column_names: + time_series_id_column_names = [self.time_series_id_column_names] + + features_unknown_at_forecast_time = self.features_unknown_at_forecast_time + if isinstance(self.features_unknown_at_forecast_time, str) and self.features_unknown_at_forecast_time: + features_unknown_at_forecast_time = [self.features_unknown_at_forecast_time] + + return RestForecastingSettings( + country_or_region_for_holidays=self.country_or_region_for_holidays, + cv_step_size=self.cv_step_size, + forecast_horizon=forecast_horizon, + time_column_name=self.time_column_name, + target_lags=target_lags, + target_rolling_window_size=target_rolling_window_size, + seasonality=seasonality, + frequency=self.frequency, + feature_lags=self.feature_lags, + use_stl=self.use_stl, + short_series_handling_config=self.short_series_handling_config, + target_aggregate_function=self.target_aggregate_function, + time_series_id_column_names=time_series_id_column_names, + features_unknown_at_forecast_time=features_unknown_at_forecast_time, + ) + + @classmethod + def _from_rest_object(cls, obj: RestForecastingSettings) -> "ForecastingSettings": + forecast_horizon = None + if obj.forecast_horizon and obj.forecast_horizon.mode == ForecastHorizonMode.AUTO: + forecast_horizon = obj.forecast_horizon.mode.lower() + elif obj.forecast_horizon: + forecast_horizon = obj.forecast_horizon.value + + rest_target_lags = obj.target_lags + target_lags = None + if rest_target_lags and rest_target_lags.mode == TargetLagsMode.AUTO: + target_lags = rest_target_lags.mode.lower() + elif rest_target_lags: + target_lags = rest_target_lags.values + + target_rolling_window_size = None + if obj.target_rolling_window_size and obj.target_rolling_window_size.mode == TargetRollingWindowSizeMode.AUTO: + target_rolling_window_size = obj.target_rolling_window_size.mode.lower() + elif obj.target_rolling_window_size: + target_rolling_window_size = obj.target_rolling_window_size.value + + seasonality = None + if obj.seasonality and obj.seasonality.mode == SeasonalityMode.AUTO: + seasonality = obj.seasonality.mode.lower() + elif obj.seasonality: + seasonality = obj.seasonality.value + + return cls( + country_or_region_for_holidays=obj.country_or_region_for_holidays, + cv_step_size=obj.cv_step_size, + forecast_horizon=forecast_horizon, + target_lags=target_lags, + target_rolling_window_size=target_rolling_window_size, + frequency=obj.frequency, + feature_lags=obj.feature_lags, + seasonality=seasonality, + use_stl=obj.use_stl, + short_series_handling_config=obj.short_series_handling_config, + target_aggregate_function=obj.target_aggregate_function, + time_column_name=obj.time_column_name, + time_series_id_column_names=obj.time_series_id_column_names, + features_unknown_at_forecast_time=obj.features_unknown_at_forecast_time, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ForecastingSettings): + return NotImplemented + return ( + self.country_or_region_for_holidays == other.country_or_region_for_holidays + and self.cv_step_size == other.cv_step_size + and self.forecast_horizon == other.forecast_horizon + and self.target_lags == other.target_lags + and self.target_rolling_window_size == other.target_rolling_window_size + and self.frequency == other.frequency + and self.feature_lags == other.feature_lags + and self.seasonality == other.seasonality + and self.use_stl == other.use_stl + and self.short_series_handling_config == other.short_series_handling_config + and self.target_aggregate_function == other.target_aggregate_function + and self.time_column_name == other.time_column_name + and self.time_series_id_column_names == other.time_series_id_column_names + and self.features_unknown_at_forecast_time == other.features_unknown_at_forecast_time + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/limit_settings.py new file mode 100644 index 00000000..1024f504 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/limit_settings.py @@ -0,0 +1,101 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import TableVerticalLimitSettings as RestTabularLimitSettings +from azure.ai.ml._utils.utils import from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class TabularLimitSettings(RestTranslatableMixin): + """Limit settings for a AutoML Table Verticals. + + :param enable_early_termination: Whether to enable early termination if the score is not improving in + the short term. The default is True. + :type enable_early_termination: bool + :param exit_score: Target score for experiment. The experiment terminates after this score is reached. + :type exit_score: float + :param max_concurrent_trials: Maximum number of concurrent AutoML iterations. + :type max_concurrent_trials: int + :param max_cores_per_trial: The maximum number of threads to use for a given training iteration. + :type max_cores_per_trial: int + :param max_nodes: [Experimental] The maximum number of nodes to use for distributed training. + + * For forecasting, each model is trained using max(2, int(max_nodes / max_concurrent_trials)) nodes. + + * For classification/regression, each model is trained using max_nodes nodes. + + Note- This parameter is in public preview and might change in future. + :type max_nodes: int + :param max_trials: Maximum number of AutoML iterations. + :type max_trials: int + :param timeout_minutes: AutoML job timeout. + :type timeout_minutes: int + :param trial_timeout_minutes: AutoML job timeout. + :type trial_timeout_minutes: int + """ + + def __init__( + self, + *, + enable_early_termination: Optional[bool] = None, + exit_score: Optional[float] = None, + max_concurrent_trials: Optional[int] = None, + max_cores_per_trial: Optional[int] = None, + max_nodes: Optional[int] = None, + max_trials: Optional[int] = None, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ): + self.enable_early_termination = enable_early_termination + self.exit_score = exit_score + self.max_concurrent_trials = max_concurrent_trials + self.max_cores_per_trial = max_cores_per_trial + self.max_nodes = max_nodes + self.max_trials = max_trials + self.timeout_minutes = timeout_minutes + self.trial_timeout_minutes = trial_timeout_minutes + + def _to_rest_object(self) -> RestTabularLimitSettings: + return RestTabularLimitSettings( + enable_early_termination=self.enable_early_termination, + exit_score=self.exit_score, + max_concurrent_trials=self.max_concurrent_trials, + max_cores_per_trial=self.max_cores_per_trial, + max_nodes=self.max_nodes, + max_trials=self.max_trials, + timeout=to_iso_duration_format_mins(self.timeout_minutes), + trial_timeout=to_iso_duration_format_mins(self.trial_timeout_minutes), + ) + + @classmethod + def _from_rest_object(cls, obj: RestTabularLimitSettings) -> "TabularLimitSettings": + return cls( + enable_early_termination=obj.enable_early_termination, + exit_score=obj.exit_score, + max_concurrent_trials=obj.max_concurrent_trials, + max_cores_per_trial=obj.max_cores_per_trial, + max_nodes=obj.max_nodes, + max_trials=obj.max_trials, + timeout_minutes=from_iso_duration_format_mins(obj.timeout), + trial_timeout_minutes=from_iso_duration_format_mins(obj.trial_timeout), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TabularLimitSettings): + return NotImplemented + return ( + self.enable_early_termination == other.enable_early_termination + and self.exit_score == other.exit_score + and self.max_concurrent_trials == other.max_concurrent_trials + and self.max_cores_per_trial == other.max_cores_per_trial + and self.max_nodes == other.max_nodes + and self.max_trials == other.max_trials + and self.timeout_minutes == other.timeout_minutes + and self.trial_timeout_minutes == other.trial_timeout_minutes + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/regression_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/regression_job.py new file mode 100644 index 00000000..3531e52c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/tabular/regression_job.py @@ -0,0 +1,239 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase +from azure.ai.ml._restclient.v2023_04_01_preview.models import Regression as RestRegression +from azure.ai.ml._restclient.v2023_04_01_preview.models import RegressionPrimaryMetrics, TaskType +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.tabular import AutoMLTabular, TabularFeaturizationSettings, TabularLimitSettings +from azure.ai.ml.entities._job.automl.training_settings import RegressionTrainingSettings +from azure.ai.ml.entities._util import load_from_dict + + +class RegressionJob(AutoMLTabular): + """Configuration for AutoML Regression Job.""" + + _DEFAULT_PRIMARY_METRIC = RegressionPrimaryMetrics.NORMALIZED_ROOT_MEAN_SQUARED_ERROR + + def __init__( + self, + *, + primary_metric: Optional[str] = None, + **kwargs: Any, + ) -> None: + """Initialize a new AutoML Regression task. + + :param primary_metric: The primary metric to use for optimization + :type primary_metric: str + :param kwargs: Job-specific arguments + :type kwargs: dict + """ + # Extract any task specific settings + featurization = kwargs.pop("featurization", None) + limits = kwargs.pop("limits", None) + training = kwargs.pop("training", None) + + super().__init__( + task_type=TaskType.REGRESSION, + featurization=featurization, + limits=limits, + training=training, + **kwargs, + ) + + self.primary_metric = primary_metric or RegressionJob._DEFAULT_PRIMARY_METRIC + + @property + def primary_metric(self) -> Union[str, RegressionPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, RegressionPrimaryMetrics]) -> None: + # TODO: better way to do this + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + self._primary_metric = ( + RegressionJob._DEFAULT_PRIMARY_METRIC + if value is None + else RegressionPrimaryMetrics[camel_to_snake(value).upper()] + ) + + @property + def training(self) -> RegressionTrainingSettings: + return self._training or RegressionTrainingSettings() + + @training.setter + def training(self, value: Union[Dict, RegressionTrainingSettings]) -> None: # pylint: disable=unused-argument + ... + + def _to_rest_object(self) -> JobBase: + regression_task = RestRegression( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + validation_data_size=self.validation_data_size, + weight_column_name=self.weight_column_name, + cv_split_column_names=self.cv_split_column_names, + n_cross_validations=self.n_cross_validations, + test_data=self.test_data, + test_data_size=self.test_data_size, + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + limit_settings=self._limits._to_rest_object() if self._limits else None, + training_settings=self._training._to_rest_object() if self._training else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + self._resolve_data_inputs(regression_task) + self._validation_data_to_rest(regression_task) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=regression_task, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "RegressionJob": + properties: RestAutoMLJob = obj.properties + task_details: RestRegression = properties.task_details + + job_args_dict = { + "id": obj.id, + "name": obj.name, + "description": properties.description, + "tags": properties.tags, + "properties": properties.properties, + "experiment_name": properties.experiment_name, + "services": properties.services, + "status": properties.status, + "creation_context": obj.system_data, + "display_name": properties.display_name, + "compute": properties.compute_id, + "outputs": from_rest_data_outputs(properties.outputs), + "resources": properties.resources, + "identity": ( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + "queue_settings": properties.queue_settings, + } + + regression_job = cls( + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + validation_data_size=task_details.validation_data_size, + weight_column_name=task_details.weight_column_name, + cv_split_column_names=task_details.cv_split_column_names, + n_cross_validations=task_details.n_cross_validations, + test_data=task_details.test_data, + test_data_size=task_details.test_data_size, + featurization=( + TabularFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ), + limits=( + TabularLimitSettings._from_rest_object(task_details.limit_settings) + if task_details.limit_settings + else None + ), + training=( + RegressionTrainingSettings._from_rest_object(task_details.training_settings) + if task_details.training_settings + else None + ), + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + **job_args_dict, + ) + + regression_job._restore_data_inputs() + regression_job._validation_data_from_rest() + + return regression_job + + @classmethod + def _load_from_dict( + cls, + data: Dict, + context: Dict, + additional_message: str, + **kwargs: Any, + ) -> "RegressionJob": + from azure.ai.ml._schema.automl.table_vertical.regression import AutoMLRegressionSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLRegressionNodeSchema + + if kwargs.pop("inside_pipeline", False): + loaded_data = load_from_dict(AutoMLRegressionNodeSchema, data, context, additional_message, **kwargs) + else: + loaded_data = load_from_dict(AutoMLRegressionSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "RegressionJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + data_settings = { + "training_data": loaded_data.pop("training_data"), + "target_column_name": loaded_data.pop("target_column_name"), + "weight_column_name": loaded_data.pop("weight_column_name", None), + "validation_data": loaded_data.pop("validation_data", None), + "validation_data_size": loaded_data.pop("validation_data_size", None), + "cv_split_column_names": loaded_data.pop("cv_split_column_names", None), + "n_cross_validations": loaded_data.pop("n_cross_validations", None), + "test_data": loaded_data.pop("test_data", None), + "test_data_size": loaded_data.pop("test_data_size", None), + } + job = RegressionJob(**loaded_data) + job.set_data(**data_settings) + return job + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.table_vertical.regression import AutoMLRegressionSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLRegressionNodeSchema + + schema_dict: dict = {} + if inside_pipeline: + schema_dict = AutoMLRegressionNodeSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + else: + schema_dict = AutoMLRegressionSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + + return schema_dict + + def __eq__(self, other: object) -> bool: + if not isinstance(other, RegressionJob): + return NotImplemented + + if not super(RegressionJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/training_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/training_settings.py new file mode 100644 index 00000000..97bc7e17 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/training_settings.py @@ -0,0 +1,357 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=R0902,protected-access + +from typing import Any, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationModels +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ClassificationTrainingSettings as RestClassificationTrainingSettings, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import ForecastingModels +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ForecastingTrainingSettings as RestForecastingTrainingSettings, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import RegressionModels +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + RegressionTrainingSettings as RestRegressionTrainingSettings, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import TrainingSettings as RestTrainingSettings +from azure.ai.ml._utils.utils import camel_to_snake, from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.constants import TabularTrainingMode +from azure.ai.ml.entities._job.automl.stack_ensemble_settings import StackEnsembleSettings +from azure.ai.ml.entities._mixins import RestTranslatableMixin +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +class TrainingSettings(RestTranslatableMixin): + """TrainingSettings class for Azure Machine Learning.""" + + def __init__( + self, + *, + enable_onnx_compatible_models: Optional[bool] = None, + enable_dnn_training: Optional[bool] = None, + enable_model_explainability: Optional[bool] = None, + enable_stack_ensemble: Optional[bool] = None, + enable_vote_ensemble: Optional[bool] = None, + stack_ensemble_settings: Optional[StackEnsembleSettings] = None, + ensemble_model_download_timeout: Optional[int] = None, + allowed_training_algorithms: Optional[List[str]] = None, + blocked_training_algorithms: Optional[List[str]] = None, + training_mode: Optional[Union[str, TabularTrainingMode]] = None, + ): + """TrainingSettings class for Azure Machine Learning. + + :param enable_onnx_compatible_models: If set to True, the model will be trained to be compatible with ONNX + :type enable_onnx_compatible_models: typing.Optional[bool] + :param enable_dnn_training: If set to True,the model will use DNN training + :type enable_dnn_training: typing.Optional[bool] + :param enable_model_explainability: If set to True, the model will be trained to be explainable + :type enable_model_explainability: typing.Optional[bool] + :param enable_stack_ensemble: If set to True, a final ensemble model will be created using a stack of models + :type enable_stack_ensemble: typing.Optional[bool] + :param enable_vote_ensemble: If set to True, a final ensemble model will be created using a voting ensemble + :type enable_vote_ensemble: typing.Optional[bool] + :param stack_ensemble_settings: Settings for stack ensemble + :type stack_ensemble_settings: typing.Optional[azure.ai.ml.automl.StackEnsembleSettings] + :param ensemble_model_download_timeout: Timeout for downloading ensemble models + :type ensemble_model_download_timeout: typing.Optional[typing.List[int]] + :param allowed_training_algorithms: Models to train + :type allowed_training_algorithms: typing.Optional[typing.List[str]] + :param blocked_training_algorithms: Models that will not be considered for training + :type blocked_training_algorithms: typing.Optional[typing.List[str]] + :param training_mode: [Experimental] The training mode to use. + The possible values are- + + * distributed- enables distributed training for supported algorithms. + + * non_distributed- disables distributed training. + + * auto- Currently, it is same as non_distributed. In future, this might change. + + Note: This parameter is in public preview and may change in future. + :type training_mode: typing.Optional[typing.Union[str, azure.ai.ml.constants.TabularTrainingMode]] + """ + self.enable_onnx_compatible_models = enable_onnx_compatible_models + self.enable_dnn_training = enable_dnn_training + self.enable_model_explainability = enable_model_explainability + self.enable_stack_ensemble = enable_stack_ensemble + self.enable_vote_ensemble = enable_vote_ensemble + self.stack_ensemble_settings = stack_ensemble_settings + self.ensemble_model_download_timeout = ensemble_model_download_timeout + self.allowed_training_algorithms = allowed_training_algorithms + self.blocked_training_algorithms = blocked_training_algorithms + self.training_mode = training_mode + + @property + def training_mode(self) -> Optional[TabularTrainingMode]: + return self._training_mode + + @training_mode.setter + def training_mode(self, value: Optional[Union[str, TabularTrainingMode]]) -> None: + if value is None or value is TabularTrainingMode: + self._training_mode = value + elif hasattr(TabularTrainingMode, camel_to_snake(value).upper()): + self._training_mode = TabularTrainingMode[camel_to_snake(value).upper()] + else: + supported_values = ", ".join([f'"{camel_to_snake(mode.value)}"' for mode in TabularTrainingMode]) + msg = ( + f"Unsupported training mode: {value}. Supported values are- {supported_values}. " + "Or you can use azure.ai.ml.constants.TabularTrainingMode enum." + ) + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + @property + def allowed_training_algorithms(self) -> Optional[List[str]]: + return self._allowed_training_algorithms + + @allowed_training_algorithms.setter + def allowed_training_algorithms(self, value: Optional[List[str]]) -> None: + self._allowed_training_algorithms = value + + @property + def blocked_training_algorithms(self) -> Optional[List[str]]: + return self._blocked_training_algorithms + + @blocked_training_algorithms.setter + def blocked_training_algorithms(self, value: Optional[List[str]]) -> None: + self._blocked_training_algorithms = value + + def _to_rest_object(self) -> RestTrainingSettings: + return RestTrainingSettings( + enable_dnn_training=self.enable_dnn_training, + enable_onnx_compatible_models=self.enable_onnx_compatible_models, + enable_model_explainability=self.enable_model_explainability, + enable_stack_ensemble=self.enable_stack_ensemble, + enable_vote_ensemble=self.enable_vote_ensemble, + stack_ensemble_settings=( + self.stack_ensemble_settings._to_rest_object() if self.stack_ensemble_settings else None + ), + ensemble_model_download_timeout=to_iso_duration_format_mins(self.ensemble_model_download_timeout), + training_mode=self.training_mode, + ) + + @classmethod + def _from_rest_object(cls, obj: RestTrainingSettings) -> "TrainingSettings": + return cls( + enable_dnn_training=obj.enable_dnn_training, + enable_onnx_compatible_models=obj.enable_onnx_compatible_models, + enable_model_explainability=obj.enable_model_explainability, + enable_stack_ensemble=obj.enable_stack_ensemble, + enable_vote_ensemble=obj.enable_vote_ensemble, + ensemble_model_download_timeout=from_iso_duration_format_mins(obj.ensemble_model_download_timeout), + stack_ensemble_settings=( + StackEnsembleSettings._from_rest_object(obj.stack_ensemble_settings) + if obj.stack_ensemble_settings + else None + ), + training_mode=obj.training_mode, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TrainingSettings): + return NotImplemented + return ( + self.enable_dnn_training == other.enable_dnn_training + and self.enable_onnx_compatible_models == other.enable_onnx_compatible_models + and self.enable_model_explainability == other.enable_model_explainability + and self.enable_stack_ensemble == other.enable_stack_ensemble + and self.enable_vote_ensemble == other.enable_vote_ensemble + and self.ensemble_model_download_timeout == other.ensemble_model_download_timeout + and self.stack_ensemble_settings == other.stack_ensemble_settings + and self.allowed_training_algorithms == other.allowed_training_algorithms + and self.blocked_training_algorithms == other.blocked_training_algorithms + and self.training_mode == other.training_mode + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +class ClassificationTrainingSettings(TrainingSettings): + """Classification TrainingSettings class for Azure Machine Learning.""" + + def __init__( + self, + **kwargs: Any, + ): + super().__init__(**kwargs) + + @property + def allowed_training_algorithms(self) -> Optional[List]: + return self._allowed_training_algorithms + + @allowed_training_algorithms.setter + def allowed_training_algorithms(self, allowed_model_list: Union[List[str], List[ClassificationModels]]) -> None: + self._allowed_training_algorithms = ( + None + if allowed_model_list is None + else [ClassificationModels[camel_to_snake(o)] for o in allowed_model_list] + ) + + @property + def blocked_training_algorithms(self) -> Optional[List]: + return self._blocked_training_algorithms + + @blocked_training_algorithms.setter + def blocked_training_algorithms(self, blocked_model_list: Union[List[str], List[ClassificationModels]]) -> None: + self._blocked_training_algorithms = ( + None + if blocked_model_list is None + else [ClassificationModels[camel_to_snake(o)] for o in blocked_model_list] + ) + + def _to_rest_object(self) -> RestClassificationTrainingSettings: + return RestClassificationTrainingSettings( + enable_dnn_training=self.enable_dnn_training, + enable_onnx_compatible_models=self.enable_onnx_compatible_models, + enable_model_explainability=self.enable_model_explainability, + enable_stack_ensemble=self.enable_stack_ensemble, + enable_vote_ensemble=self.enable_vote_ensemble, + stack_ensemble_settings=self.stack_ensemble_settings, + ensemble_model_download_timeout=to_iso_duration_format_mins(self.ensemble_model_download_timeout), + allowed_training_algorithms=self.allowed_training_algorithms, + blocked_training_algorithms=self.blocked_training_algorithms, + training_mode=self.training_mode, + ) + + @classmethod + def _from_rest_object(cls, obj: RestClassificationTrainingSettings) -> "ClassificationTrainingSettings": + return cls( + enable_dnn_training=obj.enable_dnn_training, + enable_onnx_compatible_models=obj.enable_onnx_compatible_models, + enable_model_explainability=obj.enable_model_explainability, + enable_stack_ensemble=obj.enable_stack_ensemble, + enable_vote_ensemble=obj.enable_vote_ensemble, + ensemble_model_download_timeout=from_iso_duration_format_mins(obj.ensemble_model_download_timeout), + stack_ensemble_settings=obj.stack_ensemble_settings, + allowed_training_algorithms=obj.allowed_training_algorithms, + blocked_training_algorithms=obj.blocked_training_algorithms, + training_mode=obj.training_mode, + ) + + +class ForecastingTrainingSettings(TrainingSettings): + """Forecasting TrainingSettings class for Azure Machine Learning.""" + + def __init__( + self, + **kwargs: Any, + ): + super().__init__(**kwargs) + + @property + def allowed_training_algorithms(self) -> Optional[List]: + return self._allowed_training_algorithms + + @allowed_training_algorithms.setter + def allowed_training_algorithms(self, allowed_model_list: Union[List[str], List[ForecastingModels]]) -> None: + self._allowed_training_algorithms = ( + None if allowed_model_list is None else [ForecastingModels[camel_to_snake(o)] for o in allowed_model_list] + ) + + @property + def blocked_training_algorithms(self) -> Optional[List]: + return self._blocked_training_algorithms + + @blocked_training_algorithms.setter + def blocked_training_algorithms(self, blocked_model_list: Union[List[str], List[ForecastingModels]]) -> None: + self._blocked_training_algorithms = ( + None if blocked_model_list is None else [ForecastingModels[camel_to_snake(o)] for o in blocked_model_list] + ) + + def _to_rest_object(self) -> RestForecastingTrainingSettings: + return RestForecastingTrainingSettings( + enable_dnn_training=self.enable_dnn_training, + enable_onnx_compatible_models=self.enable_onnx_compatible_models, + enable_model_explainability=self.enable_model_explainability, + enable_stack_ensemble=self.enable_stack_ensemble, + enable_vote_ensemble=self.enable_vote_ensemble, + stack_ensemble_settings=self.stack_ensemble_settings, + ensemble_model_download_timeout=to_iso_duration_format_mins(self.ensemble_model_download_timeout), + allowed_training_algorithms=self.allowed_training_algorithms, + blocked_training_algorithms=self.blocked_training_algorithms, + training_mode=self.training_mode, + ) + + @classmethod + def _from_rest_object(cls, obj: RestForecastingTrainingSettings) -> "ForecastingTrainingSettings": + return cls( + enable_dnn_training=obj.enable_dnn_training, + enable_onnx_compatible_models=obj.enable_onnx_compatible_models, + enable_model_explainability=obj.enable_model_explainability, + enable_stack_ensemble=obj.enable_stack_ensemble, + enable_vote_ensemble=obj.enable_vote_ensemble, + ensemble_model_download_timeout=from_iso_duration_format_mins(obj.ensemble_model_download_timeout), + stack_ensemble_settings=obj.stack_ensemble_settings, + allowed_training_algorithms=obj.allowed_training_algorithms, + blocked_training_algorithms=obj.blocked_training_algorithms, + training_mode=obj.training_mode, + ) + + +class RegressionTrainingSettings(TrainingSettings): + """Regression TrainingSettings class for Azure Machine Learning.""" + + def __init__( + self, + **kwargs: Any, + ): + super().__init__(**kwargs) + + @property + def allowed_training_algorithms(self) -> Optional[List]: + return self._allowed_training_algorithms + + @allowed_training_algorithms.setter + def allowed_training_algorithms(self, allowed_model_list: Union[List[str], List[ForecastingModels]]) -> None: + self._allowed_training_algorithms = ( + None if allowed_model_list is None else [RegressionModels[camel_to_snake(o)] for o in allowed_model_list] + ) + + @property + def blocked_training_algorithms(self) -> Optional[List]: + return self._blocked_training_algorithms + + @blocked_training_algorithms.setter + def blocked_training_algorithms(self, blocked_model_list: Union[List[str], List[ForecastingModels]]) -> None: + self._blocked_training_algorithms = ( + None if blocked_model_list is None else [RegressionModels[camel_to_snake(o)] for o in blocked_model_list] + ) + + def _to_rest_object(self) -> RestRegressionTrainingSettings: + return RestRegressionTrainingSettings( + enable_dnn_training=self.enable_dnn_training, + enable_onnx_compatible_models=self.enable_onnx_compatible_models, + enable_model_explainability=self.enable_model_explainability, + enable_stack_ensemble=self.enable_stack_ensemble, + enable_vote_ensemble=self.enable_vote_ensemble, + stack_ensemble_settings=self.stack_ensemble_settings, + ensemble_model_download_timeout=to_iso_duration_format_mins(self.ensemble_model_download_timeout), + allowed_training_algorithms=self.allowed_training_algorithms, + blocked_training_algorithms=self.blocked_training_algorithms, + training_mode=self.training_mode, + ) + + @classmethod + def _from_rest_object(cls, obj: RestRegressionTrainingSettings) -> "RegressionTrainingSettings": + return cls( + enable_dnn_training=obj.enable_dnn_training, + enable_onnx_compatible_models=obj.enable_onnx_compatible_models, + enable_model_explainability=obj.enable_model_explainability, + enable_stack_ensemble=obj.enable_stack_ensemble, + enable_vote_ensemble=obj.enable_vote_ensemble, + ensemble_model_download_timeout=from_iso_duration_format_mins(obj.ensemble_model_download_timeout), + stack_ensemble_settings=obj.stack_ensemble_settings, + allowed_training_algorithms=obj.allowed_training_algorithms, + blocked_training_algorithms=obj.blocked_training_algorithms, + training_mode=obj.training_mode, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/utils.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/utils.py new file mode 100644 index 00000000..08521d7e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/utils.py @@ -0,0 +1,47 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import TYPE_CHECKING, Dict, Type, Union + +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + +if TYPE_CHECKING: + from azure.ai.ml.entities._job.automl.image.image_classification_search_space import ImageClassificationSearchSpace + from azure.ai.ml.entities._job.automl.image.image_object_detection_search_space import ( + ImageObjectDetectionSearchSpace, + ) + from azure.ai.ml.entities._job.automl.nlp.nlp_search_space import NlpSearchSpace + from azure.ai.ml.entities._job.automl.search_space import SearchSpace + + +def cast_to_specific_search_space( + input: Union[Dict, "SearchSpace"], # pylint: disable=redefined-builtin + class_name: Union[ + Type["ImageClassificationSearchSpace"], Type["ImageObjectDetectionSearchSpace"], Type["NlpSearchSpace"] + ], + task_type: str, +) -> Union["ImageClassificationSearchSpace", "ImageObjectDetectionSearchSpace", "NlpSearchSpace"]: + def validate_searchspace_args(input_dict: dict) -> None: + searchspace = class_name() + for key in input_dict: + if not hasattr(searchspace, key): + msg = f"Received unsupported search space parameter for {task_type} Job." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + if isinstance(input, dict): + validate_searchspace_args(input) + specific_search_space = class_name(**input) + else: + validate_searchspace_args(input.__dict__) + specific_search_space = class_name._from_search_space_object(input) # pylint: disable=protected-access + + res: Union["ImageClassificationSearchSpace", "ImageObjectDetectionSearchSpace", "NlpSearchSpace"] = ( + specific_search_space + ) + return res |
