diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical')
9 files changed, 325 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py new file mode 100644 index 00000000..29a4fcd3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py new file mode 100644 index 00000000..2a5cb336 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py @@ -0,0 +1,33 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler +from azure.ai.ml._schema.core.fields import StringTransformedEnum +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml._utils.utils import camel_to_snake + + +class NlpFixedParametersSchema(metaclass=PatchedSchemaMeta): + gradient_accumulation_steps = fields.Int() + learning_rate = fields.Float() + learning_rate_scheduler = StringTransformedEnum( + allowed_values=[obj.value for obj in NlpLearningRateScheduler], + casing_transform=camel_to_snake, + ) + model_name = fields.Str() + number_of_epochs = fields.Int() + training_batch_size = fields.Int() + validation_batch_size = fields.Int() + warmup_ratio = fields.Float() + weight_decay = fields.Float() + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import NlpFixedParameters + + return NlpFixedParameters(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py new file mode 100644 index 00000000..de963478 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py @@ -0,0 +1,106 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_dump, post_load, pre_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler +from azure.ai.ml._schema._sweep.search_space import ( + ChoiceSchema, + NormalSchema, + QNormalSchema, + QUniformSchema, + RandintSchema, + UniformSchema, +) +from azure.ai.ml._schema.core.fields import ( + DumpableIntegerField, + DumpableStringField, + NestedField, + StringTransformedEnum, + UnionField, +) +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta +from azure.ai.ml._utils.utils import camel_to_snake + + +def choice_schema_of_type(cls, **kwargs): + class CustomChoiceSchema(ChoiceSchema): + values = fields.List(cls(**kwargs)) + + return CustomChoiceSchema() + + +def choice_and_single_value_schema_of_type(cls, **kwargs): + return UnionField([cls(**kwargs), NestedField(choice_schema_of_type(cls, **kwargs))]) + + +FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField( + [ + fields.Float(), + DumpableIntegerField(strict=True), + NestedField(choice_schema_of_type(DumpableIntegerField, strict=True)), + NestedField(choice_schema_of_type(fields.Float)), + NestedField(UniformSchema()), + NestedField(QUniformSchema()), + NestedField(NormalSchema()), + NestedField(QNormalSchema()), + NestedField(RandintSchema()), + ] +) + +INT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField( + [ + DumpableIntegerField(strict=True), + NestedField(choice_schema_of_type(DumpableIntegerField, strict=True)), + NestedField(RandintSchema()), + ] +) + +STRING_SEARCH_SPACE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(DumpableStringField) +BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(fields.Bool) + + +class NlpParameterSubspaceSchema(metaclass=PatchedSchemaMeta): + gradient_accumulation_steps = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + learning_rate = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + learning_rate_scheduler = choice_and_single_value_schema_of_type( + StringTransformedEnum, + allowed_values=[obj.value for obj in NlpLearningRateScheduler], + casing_transform=camel_to_snake, + ) + model_name = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD + number_of_epochs = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + training_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + validation_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD + warmup_ratio = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + weight_decay = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD + + @post_dump + def conversion(self, data, **kwargs): + if self.context.get("inside_pipeline", False): # pylint: disable=no-member + # AutoML job inside pipeline does load(dump) instead of calling to_rest_object + # explicitly for creating the autoRest Object from sdk job. + # Hence for pipeline job, we explicitly convert Sweep Distribution dict to str after dump in this method. + # For standalone automl job, same conversion happens in text_classification_job._to_rest_object() + from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_dict_to_str_dict + + data = _convert_sweep_dist_dict_to_str_dict(data) + return data + + @pre_load + def before_make(self, data, **kwargs): + if self.context.get("inside_pipeline", False): # pylint: disable=no-member + from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_str_to_dict + + # Converting Sweep Distribution str to Sweep Distribution dict for complying with search_space schema. + data = _convert_sweep_dist_str_to_dict(data) + return data + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import NlpSearchSpace + + return NlpSearchSpace(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py new file mode 100644 index 00000000..ab9b5ec3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py @@ -0,0 +1,27 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument,protected-access + +from marshmallow import post_load, pre_dump + +from azure.ai.ml._schema._sweep.sweep_fields_provider import EarlyTerminationField, SamplingAlgorithmField +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta + + +class NlpSweepSettingsSchema(metaclass=PatchedSchemaMeta): + sampling_algorithm = SamplingAlgorithmField() + early_termination = EarlyTerminationField() + + @pre_dump + def conversion(self, data, **kwargs): + rest_obj = data._to_rest_object() + rest_obj.early_termination = data.early_termination + return rest_obj + + @post_load + def make(self, data, **kwargs): + from azure.ai.ml.automl import NlpSweepSettings + + return NlpSweepSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py new file mode 100644 index 00000000..f701ce95 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py @@ -0,0 +1,24 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from marshmallow import fields + +from azure.ai.ml._schema.automl.automl_vertical import AutoMLVerticalSchema +from azure.ai.ml._schema.automl.featurization_settings import NlpFeaturizationSettingsSchema +from azure.ai.ml._schema.automl.nlp_vertical.nlp_fixed_parameters import NlpFixedParametersSchema +from azure.ai.ml._schema.automl.nlp_vertical.nlp_parameter_subspace import NlpParameterSubspaceSchema +from azure.ai.ml._schema.automl.nlp_vertical.nlp_sweep_settings import NlpSweepSettingsSchema +from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical_limit_settings import NlpLimitsSchema +from azure.ai.ml._schema.core.fields import NestedField, UnionField +from azure.ai.ml._schema.job.input_output_entry import MLTableInputSchema +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class NlpVerticalSchema(AutoMLVerticalSchema): + limits = NestedField(NlpLimitsSchema()) + sweep = NestedField(NlpSweepSettingsSchema()) + training_parameters = NestedField(NlpFixedParametersSchema()) + search_space = fields.List(NestedField(NlpParameterSubspaceSchema())) + featurization = NestedField(NlpFeaturizationSettingsSchema(), data_key=AutoMLConstants.FEATURIZATION_YAML) + validation_data = UnionField([NestedField(MLTableInputSchema)]) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py new file mode 100644 index 00000000..fe054f38 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py @@ -0,0 +1,23 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from marshmallow import fields, post_load + +from azure.ai.ml._schema.core.schema import PatchedSchemaMeta + + +class NlpLimitsSchema(metaclass=PatchedSchemaMeta): + max_concurrent_trials = fields.Int() + max_trials = fields.Int() + max_nodes = fields.Int() + timeout_minutes = fields.Int() # type duration + trial_timeout_minutes = fields.Int() # type duration + + @post_load + def make(self, data, **kwargs) -> "NlpLimitSettings": + from azure.ai.ml.automl import NlpLimitSettings + + return NlpLimitSettings(**data) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py new file mode 100644 index 00000000..14e0b7d6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py @@ -0,0 +1,36 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema +from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class TextClassificationSchema(NlpVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.TEXT_CLASSIFICATION, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=[o.value for o in ClassificationPrimaryMetrics], + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationPrimaryMetrics.ACCURACY), + ) + # added here as for text_ner target_column_name is optional + target_column_name = fields.Str(required=True) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py new file mode 100644 index 00000000..56cd5bc1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py @@ -0,0 +1,36 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationMultilabelPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema +from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class TextClassificationMultilabelSchema(NlpVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.TEXT_CLASSIFICATION_MULTILABEL, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=ClassificationMultilabelPrimaryMetrics.ACCURACY, + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationMultilabelPrimaryMetrics.ACCURACY), + ) + # added here as for text_ner target_column_name is optional + target_column_name = fields.Str(required=True) + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py new file mode 100644 index 00000000..3609b1d0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py @@ -0,0 +1,35 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=unused-argument + +from typing import Any, Dict + +from marshmallow import post_load + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, TaskType +from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema +from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants._job.automl import AutoMLConstants + + +class TextNerSchema(NlpVerticalSchema): + task_type = StringTransformedEnum( + allowed_values=TaskType.TEXT_NER, + casing_transform=camel_to_snake, + data_key=AutoMLConstants.TASK_TYPE_YAML, + required=True, + ) + primary_metric = StringTransformedEnum( + allowed_values=ClassificationPrimaryMetrics.ACCURACY, + casing_transform=camel_to_snake, + load_default=camel_to_snake(ClassificationPrimaryMetrics.ACCURACY), + ) + target_column_name = fields.Str() + + @post_load + def make(self, data, **kwargs) -> Dict[str, Any]: + data.pop("task_type") + return data |
