aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/azure/ai/ml/_schema/automl/featurization_settings.py
blob: 19998e45c094d7c3976bb8f42c142193e1df98e5 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------

# pylint: disable=unused-argument

from marshmallow import fields as flds
from marshmallow import post_load

from azure.ai.ml._restclient.v2023_04_01_preview.models import BlockedTransformers
from azure.ai.ml._schema.core.fields import NestedField, StringTransformedEnum, UnionField
from azure.ai.ml._schema.core.schema import PatchedSchemaMeta
from azure.ai.ml._utils.utils import camel_to_snake
from azure.ai.ml.constants._job.automl import AutoMLConstants, AutoMLTransformerParameterKeys


class ColumnTransformerSchema(metaclass=PatchedSchemaMeta):
    fields = flds.List(flds.Str())
    parameters = flds.Dict(
        keys=flds.Str(),
        values=UnionField([flds.Float(), flds.Str()], allow_none=True, load_default=None),
    )

    @post_load
    def make(self, data, **kwargs):
        from azure.ai.ml.automl import ColumnTransformer

        return ColumnTransformer(**data)


class FeaturizationSettingsSchema(metaclass=PatchedSchemaMeta):
    dataset_language = flds.Str()


class NlpFeaturizationSettingsSchema(FeaturizationSettingsSchema):
    dataset_language = flds.Str()

    @post_load
    def make(self, data, **kwargs) -> "NlpFeaturizationSettings":
        from azure.ai.ml.automl import NlpFeaturizationSettings

        return NlpFeaturizationSettings(**data)


class TableFeaturizationSettingsSchema(FeaturizationSettingsSchema):
    mode = StringTransformedEnum(
        allowed_values=[
            AutoMLConstants.AUTO,
            AutoMLConstants.OFF,
            AutoMLConstants.CUSTOM,
        ],
        load_default=AutoMLConstants.AUTO,
    )
    blocked_transformers = flds.List(
        StringTransformedEnum(
            allowed_values=[o.value for o in BlockedTransformers],
            casing_transform=camel_to_snake,
        )
    )
    column_name_and_types = flds.Dict(keys=flds.Str(), values=flds.Str())
    transformer_params = flds.Dict(
        keys=StringTransformedEnum(
            allowed_values=[o.value for o in AutoMLTransformerParameterKeys],
            casing_transform=camel_to_snake,
        ),
        values=flds.List(NestedField(ColumnTransformerSchema())),
    )
    enable_dnn_featurization = flds.Bool()

    @post_load
    def make(self, data, **kwargs) -> "TabularFeaturizationSettings":
        from azure.ai.ml.automl import TabularFeaturizationSettings

        return TabularFeaturizationSettings(**data)