diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl')
29 files changed, 1345 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/__init__.py new file mode 100644 index 00000000..36befc7c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/__init__.py @@ -0,0 +1,30 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) + +from .automl_job import AutoMLJobSchema +from .automl_vertical import AutoMLVerticalSchema +from .featurization_settings import FeaturizationSettingsSchema, TableFeaturizationSettingsSchema +from .forecasting_settings import ForecastingSettingsSchema +from .table_vertical.classification import AutoMLClassificationSchema +from .table_vertical.forecasting import AutoMLForecastingSchema +from .table_vertical.regression import AutoMLRegressionSchema +from .table_vertical.table_vertical import AutoMLTableVerticalSchema +from .table_vertical.table_vertical_limit_settings import AutoMLTableLimitsSchema +from .training_settings import TrainingSettingsSchema + +__all__ = [ + "AutoMLJobSchema", + "AutoMLVerticalSchema", + "FeaturizationSettingsSchema", + "TableFeaturizationSettingsSchema", + "ForecastingSettingsSchema", + "AutoMLClassificationSchema", + "AutoMLForecastingSchema", + "AutoMLRegressionSchema", + "AutoMLTableVerticalSchema", + "AutoMLTableLimitsSchema", + "TrainingSettingsSchema", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/automl_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/automl_job.py new file mode 100644 index 00000000..ebec82c7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/automl_job.py @@ -0,0 +1,21 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from marshmallow import fields + +from azure.ai.ml._schema.core.fields import ExperimentalField, NestedField, StringTransformedEnum +from azure.ai.ml._schema.job import BaseJobSchema +from azure.ai.ml._schema.job.input_output_fields_provider import OutputsField +from azure.ai.ml._schema.job_resource_configuration import JobResourceConfigurationSchema +from azure.ai.ml._schema.queue_settings import QueueSettingsSchema +from azure.ai.ml.constants import JobType + + +class AutoMLJobSchema(BaseJobSchema): + type = StringTransformedEnum(required=True, allowed_values=JobType.AUTOML) + environment_id = fields.Str() + environment_variables = fields.Dict(keys=fields.Str(), values=fields.Str()) + outputs = OutputsField() + resources = NestedField(JobResourceConfigurationSchema()) + queue_settings = ExperimentalField(NestedField(QueueSettingsSchema)) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/automl_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/automl_vertical.py new file mode 100644 index 00000000..2cf3bb83 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/automl_vertical.py @@ -0,0 +1,18 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from azure.ai.ml._restclient.v2023_04_01_preview.models import LogVerbosity +from azure.ai.ml._schema.automl.automl_job import AutoMLJobSchema +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum, UnionField +from azure.ai.ml._schema.job.input_output_entry import MLTableInputSchema +from azure.ai.ml._utils.utils import camel_to_snake + + +class AutoMLVerticalSchema(AutoMLJobSchema): + log_verbosity = StringTransformedEnum( + allowed_values=[o.value for o in LogVerbosity], + casing_transform=camel_to_snake, + load_default=LogVerbosity.INFO, + ) + training_data = UnionField([NestedField(MLTableInputSchema)]) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/featurization_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/featurization_settings.py new file mode 100644 index 00000000..19998e45 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/featurization_settings.py @@ -0,0 +1,74 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields as flds +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import BlockedTransformers +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum, UnionField +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants, AutoMLTransformerParameterKeys + + +class ColumnTransformerSchema(metaclass=PatchedSchemaMeta): + fields = flds.List(flds.Str()) + parameters = flds.Dict( + keys=flds.Str(), + values=UnionField([flds.Float(), flds.Str()], allow_none=True, load_default=None), + ) + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import ColumnTransformer + + return ColumnTransformer(**data) + + +class FeaturizationSettingsSchema(metaclass=PatchedSchemaMeta): + dataset_language = flds.Str() + + +class NlpFeaturizationSettingsSchema(FeaturizationSettingsSchema): + dataset_language = flds.Str() + + @post_load + def make(self, data, **kwargs) -> "NlpFeaturizationSettings": + from azure.ai.ml.automl import NlpFeaturizationSettings + + return NlpFeaturizationSettings(**data) + + +class TableFeaturizationSettingsSchema(FeaturizationSettingsSchema): + mode = StringTransformedEnum( + allowed_values=[ + AutoMLConstants.AUTO, + AutoMLConstants.OFF, + AutoMLConstants.CUSTOM, + ], + load_default=AutoMLConstants.AUTO, + ) + blocked_transformers = flds.List( + StringTransformedEnum( + allowed_values=[o.value for o in BlockedTransformers], + casing_transform=camel_to_snake, + ) + ) + column_name_and_types = flds.Dict(keys=flds.Str(), values=flds.Str()) + transformer_params = flds.Dict( + keys=StringTransformedEnum( + allowed_values=[o.value for o in AutoMLTransformerParameterKeys], + casing_transform=camel_to_snake, + ), + values=flds.List(NestedField(ColumnTransformerSchema())), + ) + enable_dnn_featurization = flds.Bool() + + @post_load + def make(self, data, **kwargs) -> "TabularFeaturizationSettings": + from azure.ai.ml.automl import TabularFeaturizationSettings + + return TabularFeaturizationSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/forecasting_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/forecasting_settings.py new file mode 100644 index 00000000..56033e14 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/forecasting_settings.py @@ -0,0 +1,66 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import FeatureLags as FeatureLagsMode +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ForecastHorizonMode, + SeasonalityMode, + ShortSeriesHandlingConfiguration, + TargetAggregationFunction, + TargetLagsMode, + TargetRollingWindowSizeMode, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import UseStl as STLMode +from azure.ai.ml._schema.core.fields import StringTransformedEnum, UnionField +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta + + +class ForecastingSettingsSchema(metaclass=PatchedSchemaMeta): + country_or_region_for_holidays = fields.Str() + cv_step_size = fields.Int() + forecast_horizon = UnionField( + [ + StringTransformedEnum(allowed_values=[ForecastHorizonMode.AUTO]), + fields.Int(), + ] + ) + target_lags = UnionField( + [ + StringTransformedEnum(allowed_values=[TargetLagsMode.AUTO]), + fields.Int(), + fields.List(fields.Int()), + ] + ) + target_rolling_window_size = UnionField( + [ + StringTransformedEnum(allowed_values=[TargetRollingWindowSizeMode.AUTO]), + fields.Int(), + ] + ) + time_column_name = fields.Str() + time_series_id_column_names = UnionField([fields.Str(), fields.List(fields.Str())]) + frequency = fields.Str() + feature_lags = StringTransformedEnum(allowed_values=[FeatureLagsMode.NONE, FeatureLagsMode.AUTO]) + seasonality = UnionField( + [ + StringTransformedEnum(allowed_values=[SeasonalityMode.AUTO]), + fields.Int(), + ] + ) + short_series_handling_config = StringTransformedEnum( + allowed_values=[o.value for o in ShortSeriesHandlingConfiguration] + ) + use_stl = StringTransformedEnum(allowed_values=[STLMode.NONE, STLMode.SEASON, STLMode.SEASON_TREND]) + target_aggregate_function = StringTransformedEnum(allowed_values=[o.value for o in TargetAggregationFunction]) + features_unknown_at_forecast_time = UnionField([fields.Str(), fields.List(fields.Str())]) + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.entities._job.automl.tabular.forecasting_settings import ForecastingSettings + + return ForecastingSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/__init__.py new file mode 100644 index 00000000..29a4fcd3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_classification.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_classification.py new file mode 100644 index 00000000..c539f037 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_classification.py @@ -0,0 +1,66 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import fields, post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ClassificationMultilabelPrimaryMetrics, + ClassificationPrimaryMetrics, + TaskType, +) +from azure.ai.ml._schema.automl.image_vertical.image_model_distribution_settings import ( + ImageModelDistributionSettingsClassificationSchema, +) +from azure.ai.ml._schema.automl.image_vertical.image_model_settings import ImageModelSettingsClassificationSchema +from azure.ai.ml._schema.automl.image_vertical.image_vertical import ImageVerticalSchema +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class ImageClassificationBaseSchema(ImageVerticalSchema): + training_parameters = NestedField(ImageModelSettingsClassificationSchema()) + search_space = fields.List(NestedField(ImageModelDistributionSettingsClassificationSchema())) + + +class ImageClassificationSchema(ImageClassificationBaseSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.IMAGE_CLASSIFICATION, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=[o.value for o in ClassificationPrimaryMetrics], + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationPrimaryMetrics.Accuracy), + ) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data + + +class ImageClassificationMultilabelSchema(ImageClassificationBaseSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.IMAGE_CLASSIFICATION_MULTILABEL, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=[o.value for o in ClassificationMultilabelPrimaryMetrics], + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationMultilabelPrimaryMetrics.IOU), + ) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_limit_settings.py new file mode 100644 index 00000000..3f5c73e8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_limit_settings.py @@ -0,0 +1,21 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta + + +class ImageLimitsSchema(metaclass=PatchedSchemaMeta): + max_concurrent_trials = fields.Int() + max_trials = fields.Int() + timeout_minutes = fields.Int() # type duration + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import ImageLimitSettings + + return ImageLimitSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_model_distribution_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_model_distribution_settings.py new file mode 100644 index 00000000..9f784038 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_model_distribution_settings.py @@ -0,0 +1,216 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_dump, post_load, pre_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LearningRateScheduler, + ModelSize, + StochasticOptimizer, + ValidationMetricType, +) +from azure.ai.ml._schema._sweep.search_space import ( + ChoiceSchema, + IntegerQNormalSchema, + IntegerQUniformSchema, + NormalSchema, + QNormalSchema, + QUniformSchema, + RandintSchema, + UniformSchema, +) +from azure.ai.ml._schema.core.fields import ( + DumpableIntegerField, + DumpableStringField, + NestedField, + StringTransformedEnum, + UnionField, +) +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml._utils.utils import camel_to_snake + + +def choice_schema_of_type(cls, **kwargs): + class CustomChoiceSchema(ChoiceSchema): + values = fields.List(cls(**kwargs)) + + return CustomChoiceSchema() + + +def choice_and_single_value_schema_of_type(cls, **kwargs): + # Reshuffling the order of fields for allowing choice of booleans. + # The reason is, while dumping [Bool, Choice[Bool]] is parsing even dict as True. + # Since all unionFields are parsed sequentially, to avoid this, we are giving the "type" field at the end. + return UnionField([NestedField(choice_schema_of_type(cls, **kwargs)), cls(**kwargs)]) + + +FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField( + [ + fields.Float(), + DumpableIntegerField(strict=True), + NestedField(choice_schema_of_type(DumpableIntegerField, strict=True)), + NestedField(choice_schema_of_type(fields.Float)), + NestedField(UniformSchema()), + NestedField(QUniformSchema()), + NestedField(NormalSchema()), + NestedField(QNormalSchema()), + NestedField(RandintSchema()), + ] +) + +INT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField( + [ + DumpableIntegerField(strict=True), + NestedField(choice_schema_of_type(DumpableIntegerField, strict=True)), + NestedField(RandintSchema()), + NestedField(IntegerQUniformSchema()), + NestedField(IntegerQNormalSchema()), + ] +) + +STRING_SEARCH_SPACE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(DumpableStringField) +BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(fields.Bool) + +model_size_enum_args = {"allowed_values": [o.value for o in ModelSize], "casing_transform": camel_to_snake} +learning_rate_scheduler_enum_args = { + "allowed_values": [o.value for o in LearningRateScheduler], + "casing_transform": camel_to_snake, +} +optimizer_enum_args = {"allowed_values": [o.value for o in StochasticOptimizer], "casing_transform": camel_to_snake} +validation_metric_enum_args = { + "allowed_values": [o.value for o in ValidationMetricType], + "casing_transform": camel_to_snake, +} + + +MODEL_SIZE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(StringTransformedEnum, **model_size_enum_args) +LEARNING_RATE_SCHEDULER_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type( + StringTransformedEnum, **learning_rate_scheduler_enum_args +) +OPTIMIZER_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(StringTransformedEnum, **optimizer_enum_args) +VALIDATION_METRIC_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type( + StringTransformedEnum, **validation_metric_enum_args +) + + +class ImageModelDistributionSettingsSchema(metaclass=PatchedSchemaMeta): + ams_gradient = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD + augmentations = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD + beta1 = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + beta2 = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + distributed = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD + early_stopping = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD + early_stopping_delay = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + early_stopping_patience = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + evaluation_frequency = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + enable_onnx_normalization = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD + gradient_accumulation_step = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + layers_to_freeze = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + learning_rate = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + learning_rate_scheduler = LEARNING_RATE_SCHEDULER_DISTRIBUTION_FIELD + momentum = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + nesterov = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD + number_of_epochs = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + number_of_workers = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + optimizer = OPTIMIZER_DISTRIBUTION_FIELD + random_seed = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + step_lr_gamma = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + step_lr_step_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + training_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + validation_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + warmup_cosine_lr_cycles = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + warmup_cosine_lr_warmup_epochs = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + weight_decay = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + + +# pylint: disable-next=name-too-long +class ImageModelDistributionSettingsClassificationSchema(ImageModelDistributionSettingsSchema): + model_name = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD + training_crop_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + validation_crop_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + validation_resize_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + weighted_loss = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + + @post_dump + def conversion(self, data, **kwargs): + if self.context.get("inside_pipeline", False): # pylint: disable=no-member + # AutoML job inside pipeline does load(dump) instead of calling to_rest_object + # explicitly for creating the autoRest Object from sdk job. + # Hence for pipeline job, we explicitly convert Sweep Distribution dict to str after dump in this method. + # For standalone automl job, same conversion happens in image_classification_job._to_rest_object() + from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_dict_to_str_dict + + data = _convert_sweep_dist_dict_to_str_dict(data) + return data + + @pre_load + def before_make(self, data, **kwargs): + if self.context.get("inside_pipeline", False): # pylint: disable=no-member + from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_str_to_dict + + # Converting Sweep Distribution str to Sweep Distribution dict for complying with search_space schema. + data = _convert_sweep_dist_str_to_dict(data) + return data + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import ImageClassificationSearchSpace + + return ImageClassificationSearchSpace(**data) + + +# pylint: disable-next=name-too-long +class ImageModelDistributionSettingsDetectionCommonSchema(ImageModelDistributionSettingsSchema): + box_detections_per_image = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + box_score_threshold = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + image_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + max_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + min_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + model_size = MODEL_SIZE_DISTRIBUTION_FIELD + multi_scale = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD + nms_iou_threshold = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + tile_grid_size = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD + tile_overlap_ratio = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + tile_predictions_nms_threshold = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + validation_iou_threshold = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + validation_metric_type = VALIDATION_METRIC_DISTRIBUTION_FIELD + + @post_dump + def conversion(self, data, **kwargs): + if self.context.get("inside_pipeline", False): # pylint: disable=no-member + # AutoML job inside pipeline does load(dump) instead of calling to_rest_object + # explicitly for creating the autoRest Object from sdk job object. + # Hence for pipeline job, we explicitly convert Sweep Distribution dict to str after dump in this method. + # For standalone automl job, same conversion happens in image_object_detection_job._to_rest_object() + from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_dict_to_str_dict + + data = _convert_sweep_dist_dict_to_str_dict(data) + return data + + @pre_load + def before_make(self, data, **kwargs): + if self.context.get("inside_pipeline", False): # pylint: disable=no-member + from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_str_to_dict + + # Converting Sweep Distribution str to Sweep Distribution dict for complying with search_space schema. + data = _convert_sweep_dist_str_to_dict(data) + return data + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import ImageObjectDetectionSearchSpace + + return ImageObjectDetectionSearchSpace(**data) + + +# pylint: disable-next=name-too-long +class ImageModelDistributionSettingsObjectDetectionSchema(ImageModelDistributionSettingsDetectionCommonSchema): + model_name = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD + + +# pylint: disable-next=name-too-long +class ImageModelDistributionSettingsInstanceSegmentationSchema(ImageModelDistributionSettingsObjectDetectionSchema): + model_name = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_model_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_model_settings.py new file mode 100644 index 00000000..7c88e628 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_model_settings.py @@ -0,0 +1,96 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LearningRateScheduler, + ModelSize, + StochasticOptimizer, + ValidationMetricType, +) +from azure.ai.ml._schema.core.fields import StringTransformedEnum +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml._utils.utils import camel_to_snake + + +class ImageModelSettingsSchema(metaclass=PatchedSchemaMeta): + ams_gradient = fields.Bool() + advanced_settings = fields.Str() + beta1 = fields.Float() + beta2 = fields.Float() + checkpoint_frequency = fields.Int() + checkpoint_run_id = fields.Str() + distributed = fields.Bool() + early_stopping = fields.Bool() + early_stopping_delay = fields.Int() + early_stopping_patience = fields.Int() + evaluation_frequency = fields.Int() + enable_onnx_normalization = fields.Bool() + gradient_accumulation_step = fields.Int() + layers_to_freeze = fields.Int() + learning_rate = fields.Float() + learning_rate_scheduler = StringTransformedEnum( + allowed_values=[o.value for o in LearningRateScheduler], + casing_transform=camel_to_snake, + ) + model_name = fields.Str() + momentum = fields.Float() + nesterov = fields.Bool() + number_of_epochs = fields.Int() + number_of_workers = fields.Int() + optimizer = StringTransformedEnum( + allowed_values=[o.value for o in StochasticOptimizer], + casing_transform=camel_to_snake, + ) + random_seed = fields.Int() + step_lr_gamma = fields.Float() + step_lr_step_size = fields.Int() + training_batch_size = fields.Int() + validation_batch_size = fields.Int() + warmup_cosine_lr_cycles = fields.Float() + warmup_cosine_lr_warmup_epochs = fields.Int() + weight_decay = fields.Float() + + +class ImageModelSettingsClassificationSchema(ImageModelSettingsSchema): + training_crop_size = fields.Int() + validation_crop_size = fields.Int() + validation_resize_size = fields.Int() + weighted_loss = fields.Int() + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsClassification + + return ImageModelSettingsClassification(**data) + + +class ImageModelSettingsObjectDetectionSchema(ImageModelSettingsSchema): + box_detections_per_image = fields.Int() + box_score_threshold = fields.Float() + image_size = fields.Int() + max_size = fields.Int() + min_size = fields.Int() + model_size = StringTransformedEnum(allowed_values=[o.value for o in ModelSize], casing_transform=camel_to_snake) + multi_scale = fields.Bool() + nms_iou_threshold = fields.Float() + tile_grid_size = fields.Str() + tile_overlap_ratio = fields.Float() + tile_predictions_nms_threshold = fields.Float() + validation_iou_threshold = fields.Float() + validation_metric_type = StringTransformedEnum( + allowed_values=[o.value for o in ValidationMetricType], + casing_transform=camel_to_snake, + ) + log_training_metrics = fields.Str() + log_validation_loss = fields.Str() + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.entities._job.automl.image.image_model_settings import ImageModelSettingsObjectDetection + + return ImageModelSettingsObjectDetection(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_object_detection.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_object_detection.py new file mode 100644 index 00000000..cb753882 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_object_detection.py @@ -0,0 +1,66 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import fields, post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + InstanceSegmentationPrimaryMetrics, + ObjectDetectionPrimaryMetrics, + TaskType, +) +from azure.ai.ml._schema.automl.image_vertical.image_model_distribution_settings import ( + ImageModelDistributionSettingsInstanceSegmentationSchema, + ImageModelDistributionSettingsObjectDetectionSchema, +) +from azure.ai.ml._schema.automl.image_vertical.image_model_settings import ImageModelSettingsObjectDetectionSchema +from azure.ai.ml._schema.automl.image_vertical.image_vertical import ImageVerticalSchema +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class ImageObjectDetectionSchema(ImageVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.IMAGE_OBJECT_DETECTION, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=ObjectDetectionPrimaryMetrics.MEAN_AVERAGE_PRECISION, + casing_transform=camel_to_snake, + load_default=camel_to_snake(ObjectDetectionPrimaryMetrics.MEAN_AVERAGE_PRECISION), + ) + training_parameters = NestedField(ImageModelSettingsObjectDetectionSchema()) + search_space = fields.List(NestedField(ImageModelDistributionSettingsObjectDetectionSchema())) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data + + +class ImageInstanceSegmentationSchema(ImageVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.IMAGE_INSTANCE_SEGMENTATION, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=[InstanceSegmentationPrimaryMetrics.MEAN_AVERAGE_PRECISION], + casing_transform=camel_to_snake, + load_default=camel_to_snake(InstanceSegmentationPrimaryMetrics.MEAN_AVERAGE_PRECISION), + ) + training_parameters = NestedField(ImageModelSettingsObjectDetectionSchema()) + search_space = fields.List(NestedField(ImageModelDistributionSettingsInstanceSegmentationSchema())) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_sweep_settings.py new file mode 100644 index 00000000..66dfd7ae --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_sweep_settings.py @@ -0,0 +1,27 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument,protected-access + +from marshmallow import post_load, pre_dump + +from azure.ai.ml._schema._sweep.sweep_fields_provider import EarlyTerminationField, SamplingAlgorithmField +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta + + +class ImageSweepSettingsSchema(metaclass=PatchedSchemaMeta): + sampling_algorithm = SamplingAlgorithmField() + early_termination = EarlyTerminationField() + + @pre_dump + def conversion(self, data, **kwargs): + rest_obj = data._to_rest_object() + rest_obj.early_termination = data.early_termination + return rest_obj + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import ImageSweepSettings + + return ImageSweepSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_vertical.py new file mode 100644 index 00000000..fdfaa79f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/image_vertical/image_vertical.py @@ -0,0 +1,19 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from azure.ai.ml._schema.automl.automl_vertical import AutoMLVerticalSchema +from azure.ai.ml._schema.automl.image_vertical.image_limit_settings import ImageLimitsSchema +from azure.ai.ml._schema.automl.image_vertical.image_sweep_settings import ImageSweepSettingsSchema +from azure.ai.ml._schema.core.fields import NestedField, UnionField, fields +from azure.ai.ml._schema.job.input_output_entry import MLTableInputSchema + + +class ImageVerticalSchema(AutoMLVerticalSchema): + limits = NestedField(ImageLimitsSchema()) + sweep = NestedField(ImageSweepSettingsSchema()) + target_column_name = fields.Str(required=True) + test_data = UnionField([NestedField(MLTableInputSchema)]) + test_data_size = fields.Float() + validation_data = UnionField([NestedField(MLTableInputSchema)]) + validation_data_size = fields.Float() diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py new file mode 100644 index 00000000..29a4fcd3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py new file mode 100644 index 00000000..2a5cb336 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py @@ -0,0 +1,33 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler +from azure.ai.ml._schema.core.fields import StringTransformedEnum +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml._utils.utils import camel_to_snake + + +class NlpFixedParametersSchema(metaclass=PatchedSchemaMeta): + gradient_accumulation_steps = fields.Int() + learning_rate = fields.Float() + learning_rate_scheduler = StringTransformedEnum( + allowed_values=[obj.value for obj in NlpLearningRateScheduler], + casing_transform=camel_to_snake, + ) + model_name = fields.Str() + number_of_epochs = fields.Int() + training_batch_size = fields.Int() + validation_batch_size = fields.Int() + warmup_ratio = fields.Float() + weight_decay = fields.Float() + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import NlpFixedParameters + + return NlpFixedParameters(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py new file mode 100644 index 00000000..de963478 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py @@ -0,0 +1,106 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_dump, post_load, pre_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler +from azure.ai.ml._schema._sweep.search_space import ( + ChoiceSchema, + NormalSchema, + QNormalSchema, + QUniformSchema, + RandintSchema, + UniformSchema, +) +from azure.ai.ml._schema.core.fields import ( + DumpableIntegerField, + DumpableStringField, + NestedField, + StringTransformedEnum, + UnionField, +) +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml._utils.utils import camel_to_snake + + +def choice_schema_of_type(cls, **kwargs): + class CustomChoiceSchema(ChoiceSchema): + values = fields.List(cls(**kwargs)) + + return CustomChoiceSchema() + + +def choice_and_single_value_schema_of_type(cls, **kwargs): + return UnionField([cls(**kwargs), NestedField(choice_schema_of_type(cls, **kwargs))]) + + +FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField( + [ + fields.Float(), + DumpableIntegerField(strict=True), + NestedField(choice_schema_of_type(DumpableIntegerField, strict=True)), + NestedField(choice_schema_of_type(fields.Float)), + NestedField(UniformSchema()), + NestedField(QUniformSchema()), + NestedField(NormalSchema()), + NestedField(QNormalSchema()), + NestedField(RandintSchema()), + ] +) + +INT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField( + [ + DumpableIntegerField(strict=True), + NestedField(choice_schema_of_type(DumpableIntegerField, strict=True)), + NestedField(RandintSchema()), + ] +) + +STRING_SEARCH_SPACE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(DumpableStringField) +BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(fields.Bool) + + +class NlpParameterSubspaceSchema(metaclass=PatchedSchemaMeta): + gradient_accumulation_steps = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + learning_rate = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + learning_rate_scheduler = choice_and_single_value_schema_of_type( + StringTransformedEnum, + allowed_values=[obj.value for obj in NlpLearningRateScheduler], + casing_transform=camel_to_snake, + ) + model_name = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD + number_of_epochs = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + training_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + validation_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + warmup_ratio = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + weight_decay = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + + @post_dump + def conversion(self, data, **kwargs): + if self.context.get("inside_pipeline", False): # pylint: disable=no-member + # AutoML job inside pipeline does load(dump) instead of calling to_rest_object + # explicitly for creating the autoRest Object from sdk job. + # Hence for pipeline job, we explicitly convert Sweep Distribution dict to str after dump in this method. + # For standalone automl job, same conversion happens in text_classification_job._to_rest_object() + from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_dict_to_str_dict + + data = _convert_sweep_dist_dict_to_str_dict(data) + return data + + @pre_load + def before_make(self, data, **kwargs): + if self.context.get("inside_pipeline", False): # pylint: disable=no-member + from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_str_to_dict + + # Converting Sweep Distribution str to Sweep Distribution dict for complying with search_space schema. + data = _convert_sweep_dist_str_to_dict(data) + return data + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import NlpSearchSpace + + return NlpSearchSpace(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py new file mode 100644 index 00000000..ab9b5ec3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py @@ -0,0 +1,27 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument,protected-access + +from marshmallow import post_load, pre_dump + +from azure.ai.ml._schema._sweep.sweep_fields_provider import EarlyTerminationField, SamplingAlgorithmField +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta + + +class NlpSweepSettingsSchema(metaclass=PatchedSchemaMeta): + sampling_algorithm = SamplingAlgorithmField() + early_termination = EarlyTerminationField() + + @pre_dump + def conversion(self, data, **kwargs): + rest_obj = data._to_rest_object() + rest_obj.early_termination = data.early_termination + return rest_obj + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import NlpSweepSettings + + return NlpSweepSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py new file mode 100644 index 00000000..f701ce95 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py @@ -0,0 +1,24 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from marshmallow import fields + +from azure.ai.ml._schema.automl.automl_vertical import AutoMLVerticalSchema +from azure.ai.ml._schema.automl.featurization_settings import NlpFeaturizationSettingsSchema +from azure.ai.ml._schema.automl.nlp_vertical.nlp_fixed_parameters import NlpFixedParametersSchema +from azure.ai.ml._schema.automl.nlp_vertical.nlp_parameter_subspace import NlpParameterSubspaceSchema +from azure.ai.ml._schema.automl.nlp_vertical.nlp_sweep_settings import NlpSweepSettingsSchema +from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical_limit_settings import NlpLimitsSchema +from azure.ai.ml._schema.core.fields import NestedField, UnionField +from azure.ai.ml._schema.job.input_output_entry import MLTableInputSchema +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class NlpVerticalSchema(AutoMLVerticalSchema): + limits = NestedField(NlpLimitsSchema()) + sweep = NestedField(NlpSweepSettingsSchema()) + training_parameters = NestedField(NlpFixedParametersSchema()) + search_space = fields.List(NestedField(NlpParameterSubspaceSchema())) + featurization = NestedField(NlpFeaturizationSettingsSchema(), data_key=AutoMLConstants.FEATURIZATION_YAML) + validation_data = UnionField([NestedField(MLTableInputSchema)]) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py new file mode 100644 index 00000000..fe054f38 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py @@ -0,0 +1,23 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta + + +class NlpLimitsSchema(metaclass=PatchedSchemaMeta): + max_concurrent_trials = fields.Int() + max_trials = fields.Int() + max_nodes = fields.Int() + timeout_minutes = fields.Int() # type duration + trial_timeout_minutes = fields.Int() # type duration + + @post_load + def make(self, data, **kwargs) -> "NlpLimitSettings": + from azure.ai.ml.automl import NlpLimitSettings + + return NlpLimitSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py new file mode 100644 index 00000000..14e0b7d6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py @@ -0,0 +1,36 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema +from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class TextClassificationSchema(NlpVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.TEXT_CLASSIFICATION, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=[o.value for o in ClassificationPrimaryMetrics], + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationPrimaryMetrics.ACCURACY), + ) + # added here as for text_ner target_column_name is optional + target_column_name = fields.Str(required=True) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py new file mode 100644 index 00000000..56cd5bc1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py @@ -0,0 +1,36 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationMultilabelPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema +from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class TextClassificationMultilabelSchema(NlpVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.TEXT_CLASSIFICATION_MULTILABEL, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=ClassificationMultilabelPrimaryMetrics.ACCURACY, + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationMultilabelPrimaryMetrics.ACCURACY), + ) + # added here as for text_ner target_column_name is optional + target_column_name = fields.Str(required=True) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py new file mode 100644 index 00000000..3609b1d0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py @@ -0,0 +1,35 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema +from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class TextNerSchema(NlpVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.TEXT_NER, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=ClassificationPrimaryMetrics.ACCURACY, + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationPrimaryMetrics.ACCURACY), + ) + target_column_name = fields.Str() + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/__init__.py new file mode 100644 index 00000000..29a4fcd3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/classification.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/classification.py new file mode 100644 index 00000000..f9ce7b8b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/classification.py @@ -0,0 +1,37 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import fields, post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.table_vertical.table_vertical import AutoMLTableVerticalSchema +from azure.ai.ml._schema.automl.training_settings import ClassificationTrainingSettingsSchema +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class AutoMLClassificationSchema(AutoMLTableVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.CLASSIFICATION, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=[o.value for o in ClassificationPrimaryMetrics], + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationPrimaryMetrics.AUC_WEIGHTED), + ) + positive_label = fields.Str() + training = NestedField(ClassificationTrainingSettingsSchema(), data_key=AutoMLConstants.TRAINING_YAML) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/forecasting.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/forecasting.py new file mode 100644 index 00000000..7f302c97 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/forecasting.py @@ -0,0 +1,38 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ForecastingPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.forecasting_settings import ForecastingSettingsSchema +from azure.ai.ml._schema.automl.table_vertical.table_vertical import AutoMLTableVerticalSchema +from azure.ai.ml._schema.automl.training_settings import ForecastingTrainingSettingsSchema +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class AutoMLForecastingSchema(AutoMLTableVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.FORECASTING, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=[o.value for o in ForecastingPrimaryMetrics], + casing_transform=camel_to_snake, + load_default=camel_to_snake(ForecastingPrimaryMetrics.NORMALIZED_ROOT_MEAN_SQUARED_ERROR), + ) + training = NestedField(ForecastingTrainingSettingsSchema(), data_key=AutoMLConstants.TRAINING_YAML) + forecasting_settings = NestedField(ForecastingSettingsSchema(), data_key=AutoMLConstants.FORECASTING_YAML) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/regression.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/regression.py new file mode 100644 index 00000000..fc1e3900 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/regression.py @@ -0,0 +1,36 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import RegressionPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.table_vertical.table_vertical import AutoMLTableVerticalSchema +from azure.ai.ml._schema.automl.training_settings import RegressionTrainingSettingsSchema +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class AutoMLRegressionSchema(AutoMLTableVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.REGRESSION, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=[o.value for o in RegressionPrimaryMetrics], + casing_transform=camel_to_snake, + load_default=camel_to_snake(RegressionPrimaryMetrics.NORMALIZED_ROOT_MEAN_SQUARED_ERROR), + ) + training = NestedField(RegressionTrainingSettingsSchema(), data_key=AutoMLConstants.TRAINING_YAML) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/table_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/table_vertical.py new file mode 100644 index 00000000..e98d7066 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/table_vertical.py @@ -0,0 +1,29 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NCrossValidationsMode +from azure.ai.ml._schema.automl.automl_vertical import AutoMLVerticalSchema +from azure.ai.ml._schema.automl.featurization_settings import TableFeaturizationSettingsSchema +from azure.ai.ml._schema.automl.table_vertical.table_vertical_limit_settings import AutoMLTableLimitsSchema +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum, UnionField, fields +from azure.ai.ml._schema.job.input_output_entry import MLTableInputSchema +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class AutoMLTableVerticalSchema(AutoMLVerticalSchema): + limits = NestedField(AutoMLTableLimitsSchema(), data_key=AutoMLConstants.LIMITS_YAML) + featurization = NestedField(TableFeaturizationSettingsSchema(), data_key=AutoMLConstants.FEATURIZATION_YAML) + target_column_name = fields.Str(required=True) + validation_data = UnionField([NestedField(MLTableInputSchema)]) + validation_data_size = fields.Float() + cv_split_column_names = fields.List(fields.Str()) + n_cross_validations = UnionField( + [ + StringTransformedEnum(allowed_values=[NCrossValidationsMode.AUTO]), + fields.Int(), + ], + ) + weight_column_name = fields.Str() + test_data = UnionField([NestedField(MLTableInputSchema)]) + test_data_size = fields.Float() diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/table_vertical_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/table_vertical_limit_settings.py new file mode 100644 index 00000000..122774a6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/table_vertical/table_vertical_limit_settings.py @@ -0,0 +1,28 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._schema import ExperimentalField +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class AutoMLTableLimitsSchema(metaclass=PatchedSchemaMeta): + enable_early_termination = fields.Bool() + exit_score = fields.Float() + max_concurrent_trials = fields.Int() + max_cores_per_trial = fields.Int() + max_nodes = ExperimentalField(fields.Int()) + max_trials = fields.Int(data_key=AutoMLConstants.MAX_TRIALS_YAML) + timeout_minutes = fields.Int() # type duration + trial_timeout_minutes = fields.Int() # type duration + + @post_load + def make(self, data, **kwargs) -> "TabularLimitSettings": + from azure.ai.ml.automl import TabularLimitSettings + + return TabularLimitSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/training_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/training_settings.py new file mode 100644 index 00000000..57a76892 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/training_settings.py @@ -0,0 +1,122 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ClassificationModels, + ForecastingModels, + RegressionModels, + StackMetaLearnerType, +) +from azure.ai.ml.constants import TabularTrainingMode +from azure.ai.ml._schema import ExperimentalField +from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._job.automl.training_settings import ( + ClassificationTrainingSettings, + ForecastingTrainingSettings, + RegressionTrainingSettings, +) + + +class StackEnsembleSettingsSchema(metaclass=PatchedSchemaMeta): + stack_meta_learner_kwargs = fields.Dict() + stack_meta_learner_train_percentage = fields.Float() + stack_meta_learner_type = StringTransformedEnum( + allowed_values=[o.value for o in StackMetaLearnerType], + casing_transform=camel_to_snake, + ) + + @post_load + def make(self, data, **kwargs): + # Converting it here, as there is no corresponding entity class + stack_meta_learner_type = data.pop("stack_meta_learner_type") + stack_meta_learner_type = StackMetaLearnerType[stack_meta_learner_type.upper()] + from azure.ai.ml.entities._job.automl.stack_ensemble_settings import StackEnsembleSettings + + return StackEnsembleSettings(stack_meta_learner_type=stack_meta_learner_type, **data) + + +class TrainingSettingsSchema(metaclass=PatchedSchemaMeta): + enable_dnn_training = fields.Bool() + enable_model_explainability = fields.Bool() + enable_onnx_compatible_models = fields.Bool() + enable_stack_ensemble = fields.Bool() + enable_vote_ensemble = fields.Bool() + ensemble_model_download_timeout = fields.Int(data_key=AutoMLConstants.ENSEMBLE_MODEL_DOWNLOAD_TIMEOUT_YAML) + stack_ensemble_settings = NestedField(StackEnsembleSettingsSchema()) + training_mode = ExperimentalField( + StringTransformedEnum( + allowed_values=[o.value for o in TabularTrainingMode], + casing_transform=camel_to_snake, + ) + ) + + +class ClassificationTrainingSettingsSchema(TrainingSettingsSchema): + allowed_training_algorithms = fields.List( + StringTransformedEnum( + allowed_values=[o.value for o in ClassificationModels], + casing_transform=camel_to_snake, + ), + data_key=AutoMLConstants.ALLOWED_ALGORITHMS_YAML, + ) + blocked_training_algorithms = fields.List( + StringTransformedEnum( + allowed_values=[o.value for o in ClassificationModels], + casing_transform=camel_to_snake, + ), + data_key=AutoMLConstants.BLOCKED_ALGORITHMS_YAML, + ) + + @post_load + def make(self, data, **kwargs) -> "ClassificationTrainingSettings": + return ClassificationTrainingSettings(**data) + + +class ForecastingTrainingSettingsSchema(TrainingSettingsSchema): + allowed_training_algorithms = fields.List( + StringTransformedEnum( + allowed_values=[o.value for o in ForecastingModels], + casing_transform=camel_to_snake, + ), + data_key=AutoMLConstants.ALLOWED_ALGORITHMS_YAML, + ) + blocked_training_algorithms = fields.List( + StringTransformedEnum( + allowed_values=[o.value for o in ForecastingModels], + casing_transform=camel_to_snake, + ), + data_key=AutoMLConstants.BLOCKED_ALGORITHMS_YAML, + ) + + @post_load + def make(self, data, **kwargs) -> "ForecastingTrainingSettings": + return ForecastingTrainingSettings(**data) + + +class RegressionTrainingSettingsSchema(TrainingSettingsSchema): + allowed_training_algorithms = fields.List( + StringTransformedEnum( + allowed_values=[o.value for o in RegressionModels], + casing_transform=camel_to_snake, + ), + data_key=AutoMLConstants.ALLOWED_ALGORITHMS_YAML, + ) + blocked_training_algorithms = fields.List( + StringTransformedEnum( + allowed_values=[o.value for o in RegressionModels], + casing_transform=camel_to_snake, + ), + data_key=AutoMLConstants.BLOCKED_ALGORITHMS_YAML, + ) + + @post_load + def make(self, data, **kwargs) -> "RegressionTrainingSettings": + return RegressionTrainingSettings(**data) |