9 files changed, 325 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py
new file mode 100644
index 00000000..29a4fcd3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/__init__.py
@@ -0,0 +1,5 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py
new file mode 100644
index 00000000..2a5cb336
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_fixed_parameters.py
@@ -0,0 +1,33 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+# pylint: disable=unused-argument
+
+from marshmallow import fields, post_load
+
+from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler
+from azure.ai.ml._schema.core.fields import StringTransformedEnum
+from azure.ai.ml._schema.core.schema import PatchedSchemaMeta
+from azure.ai.ml._utils.utils import camel_to_snake
+
+
+class NlpFixedParametersSchema(metaclass=PatchedSchemaMeta):
+    gradient_accumulation_steps = fields.Int()
+    learning_rate = fields.Float()
+    learning_rate_scheduler = StringTransformedEnum(
+        allowed_values=[obj.value for obj in NlpLearningRateScheduler],
+        casing_transform=camel_to_snake,
+    )
+    model_name = fields.Str()
+    number_of_epochs = fields.Int()
+    training_batch_size = fields.Int()
+    validation_batch_size = fields.Int()
+    warmup_ratio = fields.Float()
+    weight_decay = fields.Float()
+
+    @post_load
+    def make(self, data, **kwargs):
+        from azure.ai.ml.automl import NlpFixedParameters
+
+        return NlpFixedParameters(**data)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py
new file mode 100644
index 00000000..de963478
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_parameter_subspace.py
@@ -0,0 +1,106 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+# pylint: disable=unused-argument
+
+from marshmallow import fields, post_dump, post_load, pre_load
+
+from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler
+from azure.ai.ml._schema._sweep.search_space import (
+    ChoiceSchema,
+    NormalSchema,
+    QNormalSchema,
+    QUniformSchema,
+    RandintSchema,
+    UniformSchema,
+)
+from azure.ai.ml._schema.core.fields import (
+    DumpableIntegerField,
+    DumpableStringField,
+    NestedField,
+    StringTransformedEnum,
+    UnionField,
+)
+from azure.ai.ml._schema.core.schema import PatchedSchemaMeta
+from azure.ai.ml._utils.utils import camel_to_snake
+
+
+def choice_schema_of_type(cls, **kwargs):
+    class CustomChoiceSchema(ChoiceSchema):
+        values = fields.List(cls(**kwargs))
+
+    return CustomChoiceSchema()
+
+
+def choice_and_single_value_schema_of_type(cls, **kwargs):
+    return UnionField([cls(**kwargs), NestedField(choice_schema_of_type(cls, **kwargs))])
+
+
+FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField(
+    [
+        fields.Float(),
+        DumpableIntegerField(strict=True),
+        NestedField(choice_schema_of_type(DumpableIntegerField, strict=True)),
+        NestedField(choice_schema_of_type(fields.Float)),
+        NestedField(UniformSchema()),
+        NestedField(QUniformSchema()),
+        NestedField(NormalSchema()),
+        NestedField(QNormalSchema()),
+        NestedField(RandintSchema()),
+    ]
+)
+
+INT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField(
+    [
+        DumpableIntegerField(strict=True),
+        NestedField(choice_schema_of_type(DumpableIntegerField, strict=True)),
+        NestedField(RandintSchema()),
+    ]
+)
+
+STRING_SEARCH_SPACE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(DumpableStringField)
+BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD = choice_and_single_value_schema_of_type(fields.Bool)
+
+
+class NlpParameterSubspaceSchema(metaclass=PatchedSchemaMeta):
+    gradient_accumulation_steps = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
+    learning_rate = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
+    learning_rate_scheduler = choice_and_single_value_schema_of_type(
+        StringTransformedEnum,
+        allowed_values=[obj.value for obj in NlpLearningRateScheduler],
+        casing_transform=camel_to_snake,
+    )
+    model_name = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD
+    number_of_epochs = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
+    training_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
+    validation_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
+    warmup_ratio = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
+    weight_decay = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
+
+    @post_dump
+    def conversion(self, data, **kwargs):
+        if self.context.get("inside_pipeline", False):  # pylint: disable=no-member
+            # AutoML job inside pipeline does load(dump) instead of calling to_rest_object
+            # explicitly for creating the autoRest Object from sdk job.
+            # Hence for pipeline job, we explicitly convert Sweep Distribution dict to str after dump in this method.
+            # For standalone automl job, same conversion happens in text_classification_job._to_rest_object()
+            from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_dict_to_str_dict
+
+            data = _convert_sweep_dist_dict_to_str_dict(data)
+        return data
+
+    @pre_load
+    def before_make(self, data, **kwargs):
+        if self.context.get("inside_pipeline", False):  # pylint: disable=no-member
+            from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_str_to_dict
+
+            # Converting Sweep Distribution str to Sweep Distribution dict for complying with search_space schema.
+            data = _convert_sweep_dist_str_to_dict(data)
+        return data
+
+    @post_load
+    def make(self, data, **kwargs):
+        from azure.ai.ml.automl import NlpSearchSpace
+
+        return NlpSearchSpace(**data)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py
new file mode 100644
index 00000000..ab9b5ec3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_sweep_settings.py
@@ -0,0 +1,27 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+# pylint: disable=unused-argument,protected-access
+
+from marshmallow import post_load, pre_dump
+
+from azure.ai.ml._schema._sweep.sweep_fields_provider import EarlyTerminationField, SamplingAlgorithmField
+from azure.ai.ml._schema.core.schema import PatchedSchemaMeta
+
+
+class NlpSweepSettingsSchema(metaclass=PatchedSchemaMeta):
+    sampling_algorithm = SamplingAlgorithmField()
+    early_termination = EarlyTerminationField()
+
+    @pre_dump
+    def conversion(self, data, **kwargs):
+        rest_obj = data._to_rest_object()
+        rest_obj.early_termination = data.early_termination
+        return rest_obj
+
+    @post_load
+    def make(self, data, **kwargs):
+        from azure.ai.ml.automl import NlpSweepSettings
+
+        return NlpSweepSettings(**data)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py
new file mode 100644
index 00000000..f701ce95
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+from marshmallow import fields
+
+from azure.ai.ml._schema.automl.automl_vertical import AutoMLVerticalSchema
+from azure.ai.ml._schema.automl.featurization_settings import NlpFeaturizationSettingsSchema
+from azure.ai.ml._schema.automl.nlp_vertical.nlp_fixed_parameters import NlpFixedParametersSchema
+from azure.ai.ml._schema.automl.nlp_vertical.nlp_parameter_subspace import NlpParameterSubspaceSchema
+from azure.ai.ml._schema.automl.nlp_vertical.nlp_sweep_settings import NlpSweepSettingsSchema
+from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical_limit_settings import NlpLimitsSchema
+from azure.ai.ml._schema.core.fields import NestedField, UnionField
+from azure.ai.ml._schema.job.input_output_entry import MLTableInputSchema
+from azure.ai.ml.constants._job.automl import AutoMLConstants
+
+
+class NlpVerticalSchema(AutoMLVerticalSchema):
+    limits = NestedField(NlpLimitsSchema())
+    sweep = NestedField(NlpSweepSettingsSchema())
+    training_parameters = NestedField(NlpFixedParametersSchema())
+    search_space = fields.List(NestedField(NlpParameterSubspaceSchema()))
+    featurization = NestedField(NlpFeaturizationSettingsSchema(), data_key=AutoMLConstants.FEATURIZATION_YAML)
+    validation_data = UnionField([NestedField(MLTableInputSchema)])
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py
new file mode 100644
index 00000000..fe054f38
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/nlp_vertical_limit_settings.py
@@ -0,0 +1,23 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+# pylint: disable=unused-argument
+
+from marshmallow import fields, post_load
+
+from azure.ai.ml._schema.core.schema import PatchedSchemaMeta
+
+
+class NlpLimitsSchema(metaclass=PatchedSchemaMeta):
+    max_concurrent_trials = fields.Int()
+    max_trials = fields.Int()
+    max_nodes = fields.Int()
+    timeout_minutes = fields.Int()  # type duration
+    trial_timeout_minutes = fields.Int()  # type duration
+
+    @post_load
+    def make(self, data, **kwargs) -> "NlpLimitSettings":
+        from azure.ai.ml.automl import NlpLimitSettings
+
+        return NlpLimitSettings(**data)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py
new file mode 100644
index 00000000..14e0b7d6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification.py
@@ -0,0 +1,36 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+# pylint: disable=unused-argument
+
+from typing import Any, Dict
+
+from marshmallow import post_load
+
+from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, TaskType
+from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema
+from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields
+from azure.ai.ml._utils.utils import camel_to_snake
+from azure.ai.ml.constants._job.automl import AutoMLConstants
+
+
+class TextClassificationSchema(NlpVerticalSchema):
+    task_type = StringTransformedEnum(
+        allowed_values=TaskType.TEXT_CLASSIFICATION,
+        casing_transform=camel_to_snake,
+        data_key=AutoMLConstants.TASK_TYPE_YAML,
+        required=True,
+    )
+    primary_metric = StringTransformedEnum(
+        allowed_values=[o.value for o in ClassificationPrimaryMetrics],
+        casing_transform=camel_to_snake,
+        load_default=camel_to_snake(ClassificationPrimaryMetrics.ACCURACY),
+    )
+    # added here as for text_ner target_column_name is optional
+    target_column_name = fields.Str(required=True)
+
+    @post_load
+    def make(self, data, **kwargs) -> Dict[str, Any]:
+        data.pop("task_type")
+        return data
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py
new file mode 100644
index 00000000..56cd5bc1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_classification_multilabel.py
@@ -0,0 +1,36 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+# pylint: disable=unused-argument
+
+from typing import Any, Dict
+
+from marshmallow import post_load
+
+from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationMultilabelPrimaryMetrics, TaskType
+from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema
+from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields
+from azure.ai.ml._utils.utils import camel_to_snake
+from azure.ai.ml.constants._job.automl import AutoMLConstants
+
+
+class TextClassificationMultilabelSchema(NlpVerticalSchema):
+    task_type = StringTransformedEnum(
+        allowed_values=TaskType.TEXT_CLASSIFICATION_MULTILABEL,
+        casing_transform=camel_to_snake,
+        data_key=AutoMLConstants.TASK_TYPE_YAML,
+        required=True,
+    )
+    primary_metric = StringTransformedEnum(
+        allowed_values=ClassificationMultilabelPrimaryMetrics.ACCURACY,
+        casing_transform=camel_to_snake,
+        load_default=camel_to_snake(ClassificationMultilabelPrimaryMetrics.ACCURACY),
+    )
+    # added here as for text_ner target_column_name is optional
+    target_column_name = fields.Str(required=True)
+
+    @post_load
+    def make(self, data, **kwargs) -> Dict[str, Any]:
+        data.pop("task_type")
+        return data
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py
new file mode 100644
index 00000000..3609b1d0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/nlp_vertical/text_ner.py
@@ -0,0 +1,35 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+# pylint: disable=unused-argument
+
+from typing import Any, Dict
+
+from marshmallow import post_load
+
+from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationPrimaryMetrics, TaskType
+from azure.ai.ml._schema.automl.nlp_vertical.nlp_vertical import NlpVerticalSchema
+from azure.ai.ml._schema.core.fields import StringTransformedEnum, fields
+from azure.ai.ml._utils.utils import camel_to_snake
+from azure.ai.ml.constants._job.automl import AutoMLConstants
+
+
+class TextNerSchema(NlpVerticalSchema):
+    task_type = StringTransformedEnum(
+        allowed_values=TaskType.TEXT_NER,
+        casing_transform=camel_to_snake,
+        data_key=AutoMLConstants.TASK_TYPE_YAML,
+        required=True,
+    )
+    primary_metric = StringTransformedEnum(
+        allowed_values=ClassificationPrimaryMetrics.ACCURACY,
+        casing_transform=camel_to_snake,
+        load_default=camel_to_snake(ClassificationPrimaryMetrics.ACCURACY),
+    )
+    target_column_name = fields.Str()
+
+    @post_load
+    def make(self, data, **kwargs) -> Dict[str, Any]:
+        data.pop("task_type")
+        return data