aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_component/code.py
blob: 1f838bec31b6e588ba830811b56fe154a8f58a52 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import os
from contextlib import contextmanager
from enum import Enum
from pathlib import Path
from typing import Any, Generator, List, Optional, Union

from azure.ai.ml._utils._arm_id_utils import is_ARM_id_for_resource, is_registry_id_for_resource
from azure.ai.ml._utils._asset_utils import IgnoreFile, get_ignore_file
from azure.ai.ml._utils.utils import is_private_preview_enabled
from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, AzureMLResourceType
from azure.ai.ml.entities._assets import Code
from azure.ai.ml.entities._validation import MutableValidationResult


class ComponentIgnoreFile(IgnoreFile):
    _COMPONENT_CODE_IGNORES = ["__pycache__"]
    """Component-specific ignore file used for ignoring files in a component directory.

    :param directory_path: The directory path for the ignore file.
    :type directory_path: Union[str, Path]
    :param additional_includes_file_name: Name of the additional includes file in the root directory to be ignored.
    :type additional_includes_file_name: str
    :param skip_ignore_file: Whether to skip the ignore file, defaults to False.
    :type skip_ignore_file: bool
    :param extra_ignore_list: List of additional ignore files to be considered during file exclusion.
    :type extra_ignore_list: List[~azure.ai.ml._utils._asset_utils.IgnoreFile]
    :raises ValueError: If additional include file is not found.
    :return: The ComponentIgnoreFile object.
    :rtype: ComponentIgnoreFile
    """

    def __init__(
        self,
        directory_path: Union[str, Path],
        *,
        additional_includes_file_name: Optional[str] = None,
        skip_ignore_file: bool = False,
        extra_ignore_list: Optional[List[IgnoreFile]] = None,
    ):
        self._base_path: Union[str, Path] = Path(directory_path)
        self._extra_ignore_list: List[IgnoreFile] = extra_ignore_list or []
        # only the additional include file in root directory is ignored
        # additional include files in subdirectories are not processed so keep them
        self._additional_includes_file_name = additional_includes_file_name
        # note: the parameter changes to directory path in this class, rather than file path
        file_path = None if skip_ignore_file else get_ignore_file(directory_path).path
        super(ComponentIgnoreFile, self).__init__(file_path=file_path)

    def exists(self) -> bool:
        """Check if the ignore file exists.

        :return: True
        :rtype: bool
        """
        return True

    @property
    def base_path(self) -> Union[str, Path]:
        """Get the base path of the ignore file.

        :return: The base path.
        :rtype: Path
        """
        # for component ignore file, the base path can be different from file.parent
        return self._base_path

    def rebase(self, directory_path: Union[str, Path]) -> "ComponentIgnoreFile":
        """Rebase the ignore file to a new directory.

        :param directory_path: The new directory path.
        :type directory_path: Union[str, Path]
        :return: The rebased ComponentIgnoreFile object.
        :rtype: ComponentIgnoreFile
        """
        self._base_path = directory_path
        return self

    def is_file_excluded(self, file_path: Union[str, Path]) -> bool:
        """Check if a file should be excluded based on the ignore file rules.

        :param file_path: The file path.
        :type file_path: Union[str, Path]
        :return: True if the file should be excluded, False otherwise.
        :rtype: bool
        """
        if self._additional_includes_file_name and self._get_rel_path(file_path) == self._additional_includes_file_name:
            return True
        for ignore_file in self._extra_ignore_list:
            if ignore_file.is_file_excluded(file_path):
                return True
        res: bool = super(ComponentIgnoreFile, self).is_file_excluded(file_path)
        return res

    def merge(self, other_path: Path) -> "ComponentIgnoreFile":
        """Merge the ignore list from another ComponentIgnoreFile object.

        :param other_path: The path of the other ignore file.
        :type other_path: Path
        :return: The merged ComponentIgnoreFile object.
        :rtype: ComponentIgnoreFile
        """
        if other_path.is_file():
            return self
        return ComponentIgnoreFile(other_path, extra_ignore_list=self._extra_ignore_list + [self])

    def _get_ignore_list(self) -> List[str]:
        """Retrieves the list of ignores from ignore file

        Override to add custom ignores.

        :return: The ignore rules
        :rtype: List[str]
        """
        if not super(ComponentIgnoreFile, self).exists():
            return self._COMPONENT_CODE_IGNORES
        res: list = super(ComponentIgnoreFile, self)._get_ignore_list() + self._COMPONENT_CODE_IGNORES
        return res


class CodeType(Enum):
    """Code type."""

    LOCAL = "local"
    NONE = "none"
    GIT = "git"
    ARM_ID = "arm_id"
    UNKNOWN = "unknown"


def _get_code_type(origin_code_value: Optional[str]) -> CodeType:
    if origin_code_value is None:
        return CodeType.NONE
    if not isinstance(origin_code_value, str):
        # note that:
        # 1. Code & CodeOperation are not public for now
        # 2. AnonymousCodeSchema is not within CodeField
        # 3. Code will be returned as an arm id as an attribute of a component when getting a component from remote
        # So origin_code_value should never be a Code object, or an exception will be raised
        # in validation stage.
        return CodeType.UNKNOWN
    if is_ARM_id_for_resource(origin_code_value, AzureMLResourceType.CODE) or is_registry_id_for_resource(
        origin_code_value
    ):
        return CodeType.ARM_ID
    if origin_code_value.startswith("git+"):
        return CodeType.GIT
    return CodeType.LOCAL


class ComponentCodeMixin:
    """Mixin class for components with local files as part of the component. Those local files will be uploaded to
    blob storage and further referenced as a code asset in arm id. In below docstring, we will refer to those local
    files as "code".

    The major interface of this mixin is self._customized_code_validate and self._build_code.
    self._customized_code_validate will return a validation result indicating whether the code is valid.
    self._build_code will return a temp Code object for server-side code asset creation.
    """

    def _get_base_path_for_code(self) -> Path:
        """Get base path for additional includes.

        :return: The base path
        :rtype: Path
        """
        if hasattr(self, BASE_PATH_CONTEXT_KEY):
            return Path(getattr(self, BASE_PATH_CONTEXT_KEY))
        raise NotImplementedError(
            "Component must have a base_path attribute to use ComponentCodeMixin. "
            "Please set base_path in __init__ or override _get_base_path_for_code."
        )

    @classmethod
    def _get_code_field_name(cls) -> str:
        """Get the field name for code.

        Will be used to get origin code value by default and will be used as field name of validation diagnostics.

        :return: Code field name
        :rtype: str
        """
        return "code"

    def _get_origin_code_value(self) -> Union[str, os.PathLike, None]:
        """Get origin code value.
        Origin code value is either an absolute path or a relative path to base path if it's a local path.
        Additional includes are only supported for component types with code attribute. Origin code path will be copied
        to a temp folder along with additional includes to form a new code content.
        """
        return getattr(self, self._get_code_field_name(), None)

    def _fill_back_code_value(self, value: str) -> None:
        """Fill resolved code value back to the component.

        :param value: resolved code value
        :type value: str
        :return: no return
        :rtype: None
        """
        return setattr(self, self._get_code_field_name(), value)

    def _get_origin_code_in_str(self) -> Optional[str]:
        """Get origin code value in str to simplify following logic."""
        origin_code_value = self._get_origin_code_value()
        if origin_code_value is None:
            return None
        if isinstance(origin_code_value, Path):
            return origin_code_value.as_posix()
        return str(origin_code_value)

    def _append_diagnostics_and_check_if_origin_code_reliable_for_local_path_validation(
        self, base_validation_result: Optional[MutableValidationResult] = None
    ) -> bool:
        """Append diagnostics from customized validation logic to the base validation result and check if origin code
        value is valid for path validation.

        For customized validation logic, this method shouldn't cover the validation logic duplicated with schema
        validation, like local code existence check.
        For the check, as "code" includes file dependencies of a component, other fields may depend on those files.
        However, the origin code value may not be reliable for validation of those fields. For example:
        1. origin code value can be a remote git path or an arm id of a code asset.
        2. some file operations may be done during build_code, which makes final code content different from what we can
        get from origin code value.
        So, we use this function to check if origin code value is reliable for further local path validation.

        :param base_validation_result: base validation result to append diagnostics to.
        :type base_validation_result: MutableValidationResult
        :return: whether origin code value is reliable for further local path validation.
        :rtype: bool
        """
        # If private features are enable and component has code value of type str we need to check
        # that it is a valid git path case. Otherwise, we should throw a ValidationError
        # saying that the code value is not valid
        code_type = _get_code_type(self._get_origin_code_in_str())
        if code_type == CodeType.GIT and not is_private_preview_enabled():
            if base_validation_result is not None:
                base_validation_result.append_error(
                    message="Not a valid code value: git paths are not supported.",
                    yaml_path=self._get_code_field_name(),
                )
        return code_type == CodeType.LOCAL

    @contextmanager
    def _build_code(self) -> Generator:
        """Create a Code object if necessary based on origin code value and yield it.

        :return: If built code is the same as its origin value, do nothing and yield None.
           Otherwise, yield a Code object pointing to the code.
        :rtype: Iterable[Optional[Code]]
        """
        origin_code_value = self._get_origin_code_in_str()
        code_type = _get_code_type(origin_code_value)

        if code_type == CodeType.GIT:
            # git also need to be resolved into arm id
            yield Code(path=origin_code_value)
        elif code_type in [CodeType.LOCAL, CodeType.NONE]:
            code: Any
            # false-positive by pylint, hence disable it
            # (https://github.com/pylint-dev/pylint/blob/main/doc/data/messages
            # /c/contextmanager-generator-missing-cleanup/details.rst)
            with self._try_build_local_code() as code:  # pylint:disable=contextmanager-generator-missing-cleanup
                yield code
        else:
            # arm id, None and unknown need no extra resolution
            yield None

    @contextmanager
    def _try_build_local_code(self) -> Generator:
        """Extract the logic of _build_code for local code for further override.

        :return: The Code object if could be constructed, None otherwise
        :rtype: Iterable[Optional[Code]]
        """
        origin_code_value = self._get_origin_code_in_str()
        if origin_code_value is None:
            yield None
        else:
            base_path = self._get_base_path_for_code()
            absolute_path: Union[str, Path] = (
                origin_code_value if os.path.isabs(origin_code_value) else base_path / origin_code_value
            )

            yield Code(
                base_path=base_path,
                path=origin_code_value,
                ignore_file=ComponentIgnoreFile(absolute_path),
            )

    def _with_local_code(self) -> bool:
        # TODO: remove this method after we have a better way to do this judge in cache_utils
        origin_code_value = self._get_origin_code_in_str()
        code_type = _get_code_type(origin_code_value)
        return code_type == CodeType.LOCAL