aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/azure/ai/ml/_exception_helper.py
blob: 343ea7141d4e1c7d29d88c277fbe581d13e6ecb9 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------

import json
import logging
import traceback
from typing import Dict, NoReturn, Optional, Tuple, Union

from colorama import Fore, Style, init
from marshmallow.exceptions import ValidationError as SchemaValidationError

from azure.ai.ml.constants._common import (
    DATASTORE_SCHEMA_TYPES,
    SCHEMA_VALIDATION_ERROR_TEMPLATE,
    YAML_CREATION_ERROR_DESCRIPTION,
)
from azure.ai.ml.exceptions import ErrorTarget, MlException, ValidationErrorType, ValidationException

module_logger = logging.getLogger(__name__)


def _find_deepest_dictionary(data: dict) -> dict:
    """Find deepest dictionary in nested dictionary.

    Used here to get nested error message. Can't be in utils.py due to circular import.

    :param data: The dictionary
    :type data: dict
    :return: The deepest dictionary
    :rtype: dict
    """

    for v in data.values():
        if isinstance(v, dict):
            return _find_deepest_dictionary(v)

    return data


def get_entity_type(error: Union[str, SchemaValidationError, ValidationException]) -> Tuple[str, str]:
    """Get entity name from schema type referenced in the error.

    :param error: The validation error
    :type error: Union[str, SchemaValidationError, ValidationException]
    :return: The entity type and additional details
    :rtype: Tuple[str, str]
    """
    details = ""

    error_name = error.exc_msg if hasattr(error, "exc_msg") else error.split(":")[0]
    if isinstance(error, ValidationException):
        entity_type = error.target
    else:
        details += error_name + "\n"
        if "DataSchema" in error_name:
            entity_type = ErrorTarget.DATA
        elif "ModelSchema" in error_name:
            entity_type = ErrorTarget.MODEL
        elif "EnvironmentSchema" in error_name:
            entity_type = ErrorTarget.ENVIRONMENT
        elif "CodeAssetSchema" in error_name:
            entity_type = ErrorTarget.CODE
        elif any(x in error_name for x in DATASTORE_SCHEMA_TYPES):
            entity_type = ErrorTarget.DATASTORE
        elif "BaseJobSchema" in error_name:
            entity_type = ErrorTarget.JOB
        elif "CommandSchema" in error_name:
            entity_type = ErrorTarget.COMMAND_JOB
        elif "CommandJobSchema" in error_name:
            entity_type = ErrorTarget.COMMAND_JOB
        elif "SweepSchema" in error_name:
            entity_type = ErrorTarget.SWEEP_JOB
        elif "SweepJobSchema" in error_name:
            entity_type = ErrorTarget.SWEEP_JOB
        elif "AutoMLJobSchema" in error_name:
            entity_type = ErrorTarget.AUTOML
        else:
            raise NotImplementedError()

    return entity_type, details


def format_details_section(
    error: Union[str, SchemaValidationError, ValidationException], details: str, entity_type: str
) -> Tuple[Dict[str, bool], str]:
    """Builds strings for details of the error message template's Details section.

    :param error: The validation error
    :type error: Union[str, SchemaValidationError, ValidationException]
    :param details: The details
    :type details: str
    :param entity_type: The entity type
    :type entity_type: str
    :return: Error type map and formatted message
    :rtype: Tuple[Dict[str, bool], str]
    """

    error_types = {
        ValidationErrorType.INVALID_VALUE: False,
        ValidationErrorType.UNKNOWN_FIELD: False,
        ValidationErrorType.MISSING_FIELD: False,
        ValidationErrorType.FILE_OR_FOLDER_NOT_FOUND: False,
        ValidationErrorType.CANNOT_SERIALIZE: False,
        ValidationErrorType.CANNOT_PARSE: False,
        ValidationErrorType.RESOURCE_NOT_FOUND: False,
    }

    if hasattr(error, "message"):
        error_types[error.error_type] = True
        details += f"\n\n{Fore.RED}(x) {error.message}{Fore.RESET}\n"
    else:
        if (
            entity_type == ErrorTarget.COMMAND_JOB
        ):  # Command Job errors require different parsing because they're using ValidationResult.
            # This separate treatment will be unnecessary once
            # task https://msdata.visualstudio.com/Vienna/_workitems/edit/1925982/ is resolved.
            parsed_error_msg = error[error.find("{") : (error.rfind("}") + 1)]
            error_msg = json.loads(parsed_error_msg).get("errors")[0].get("message")
            error_msg = error_msg[error_msg.find("{") : (error_msg.rfind("}") + 1)]
            error_msg = error_msg[: (error_msg.rfind("}") + 1)]
            error_msg = json.loads(error_msg)
        else:
            parsed_error_msg = error[error.find("{") : (error.rfind("}") + 1)]
            error_msg = json.loads(parsed_error_msg)

        for field, field_error in zip(error_msg.keys(), error_msg.values()):
            try:
                field_error = field_error[0]
            except KeyError:
                error_msg = _find_deepest_dictionary(field_error)
                if entity_type not in [ErrorTarget.PIPELINE, ErrorTarget.COMMAND_JOB]:
                    field = list(error_msg.keys())[0]
                field_error = list(error_msg.values())[0][0]
            field_error_string = str(field_error)

            if not error_types[ValidationErrorType.INVALID_VALUE] and any(
                s in field_error_string for s in ["Not a valid", "is not in set"]
            ):
                error_types[ValidationErrorType.INVALID_VALUE] = True
            if not error_types[ValidationErrorType.UNKNOWN_FIELD] and "Unknown field" in field_error_string:
                error_types[ValidationErrorType.UNKNOWN_FIELD] = True
            if (
                not error_types[ValidationErrorType.MISSING_FIELD]
                and "Missing data for required field" in field_error_string
            ):
                error_types[ValidationErrorType.MISSING_FIELD] = True
            if (
                not error_types[ValidationErrorType.FILE_OR_FOLDER_NOT_FOUND]
                and "No such file or directory" in field_error_string
            ):
                error_types[ValidationErrorType.FILE_OR_FOLDER_NOT_FOUND] = True
            if not error_types[ValidationErrorType.CANNOT_SERIALIZE] and "Cannot dump" in field_error_string:
                error_types[ValidationErrorType.CANNOT_SERIALIZE] = True
            if (
                not error_types[ValidationErrorType.CANNOT_PARSE]
                and "Error while parsing yaml file" in field_error_string
            ):
                error_types[ValidationErrorType.CANNOT_PARSE] = True

            if isinstance(field_error, dict):
                field_error = f"{list(field_error.keys())[0]}:\n    - {list(field_error.values())[0][0]}"

            details += f"{Fore.RED}\n(x) {field}:\n- {field_error}{Fore.RESET}\n"
    return error_types, details


def format_errors_and_resolutions_sections(
    entity_type: str, error_types: Dict[str, bool], cli: bool
) -> Tuple[str, str]:
    """Builds strings for details of the error message template's Errors and Resolutions sections.

    :param entity_type: The entity type
    :type entity_type: str
    :param error_types: A dict that specifies which errors were encountered
    :type error_types: Dict[str, bool]
    :param cli: Whether this was invoked from Azure CLI AzureML extension.
    :type cli: bool
    :return: Error and Resolution messages
    :rtype: Tuple[str, str]
    """

    resolutions = ""
    errors = ""
    count = 1

    if error_types[ValidationErrorType.INVALID_VALUE]:
        errors += f"\n{count}) One or more fields are invalid"
        resolutions += (
            f"\n{count}) Double-check that all specified parameters are of the correct types and formats "
            f"prescribed by the {entity_type} schema."
        )
        count += 1
    if error_types[ValidationErrorType.UNKNOWN_FIELD]:
        errors += f"\n{count}) A least one unrecognized parameter is specified"
        resolutions += f"\n{count}) Remove any parameters not prescribed by the {entity_type} schema."
        count += 1
    if error_types[ValidationErrorType.MISSING_FIELD]:
        errors += f"\n{count}) At least one required parameter is missing"
        resolutions += f"\n{count}) Ensure all parameters required by the {entity_type} schema are specified."
        count += 1
    if error_types[ValidationErrorType.FILE_OR_FOLDER_NOT_FOUND]:
        errors += f"\n{count}) One or more files or folders do not exist.\n"
        resolutions += f"\n{count}) Double-check the directory path you provided and enter the correct path."
        count += 1
    if error_types[ValidationErrorType.CANNOT_SERIALIZE]:
        errors += f"\n{count}) One or more fields cannot be serialized.\n"
        resolutions += f"\n{count}) Double-check that all specified parameters are of the correct types and formats \
        prescribed by the {entity_type} schema."
        count += 1
    if error_types[ValidationErrorType.CANNOT_PARSE]:
        errors += f"\n{count}) YAML file cannot be parsed.\n"
        resolutions += f"\n{count}) Double-check your YAML file for syntax and formatting errors."
        count += 1
    if error_types[ValidationErrorType.RESOURCE_NOT_FOUND]:
        errors += f"\n{count}) Resource was not found.\n"
        resolutions += (
            f"\n{count}) Double-check that the resource has been specified correctly and " "that you have access to it."
        )
        count += 1

    if cli:
        errors = "Error: " + errors
    else:
        errors = Fore.BLACK + errors + Fore.RESET

    return errors, resolutions


def format_create_validation_error(
    error: Union[SchemaValidationError, ValidationException],
    yaml_operation: bool,
    cli: bool = False,
    raw_error: Optional[str] = None,
) -> str:
    """Formats a detailed error message for validation errors.

    :param error: The validation error
    :type error: Union[SchemaValidationError, ValidationException]
    :param yaml_operation: Whether the exception was caused by yaml validation.
    :type yaml_operation: bool
    :param cli: Whether is invoked from Azure CLI AzureML Extension. Defaults to False
    :type cli: bool
    :param raw_error: The raw exception message
    :type raw_error: Optional[str]
    :return: Formatted error message
    :rtype: str
    """
    from azure.ai.ml._schema._datastore import (
        AzureBlobSchema,
        AzureDataLakeGen1Schema,
        AzureDataLakeGen2Schema,
        AzureFileSchema,
    )
    from azure.ai.ml._schema._sweep import SweepJobSchema
    from azure.ai.ml._schema.assets.data import DataSchema
    from azure.ai.ml._schema.assets.environment import EnvironmentSchema
    from azure.ai.ml._schema.assets.model import ModelSchema
    from azure.ai.ml._schema.job import CommandJobSchema
    from azure.ai.ml.entities._util import REF_DOC_ERROR_MESSAGE_MAP

    if raw_error:
        error = raw_error
    entity_type, details = get_entity_type(error)
    error_types, details = format_details_section(error, details, entity_type)
    errors, resolutions = format_errors_and_resolutions_sections(entity_type, error_types, cli)

    if yaml_operation:
        description = YAML_CREATION_ERROR_DESCRIPTION.format(entity_type=entity_type)
        description = Style.BRIGHT + description + Style.RESET_ALL

        if entity_type == ErrorTarget.MODEL:
            schema_type = ModelSchema
        elif entity_type == ErrorTarget.DATA:
            schema_type = DataSchema
        elif entity_type == ErrorTarget.COMMAND_JOB:
            schema_type = CommandJobSchema
        elif entity_type == ErrorTarget.SWEEP_JOB:
            schema_type = SweepJobSchema
        elif entity_type in [ErrorTarget.BLOB_DATASTORE, ErrorTarget.DATASTORE]:
            schema_type = AzureBlobSchema
        elif entity_type == ErrorTarget.GEN1_DATASTORE:
            schema_type = AzureDataLakeGen1Schema
        elif entity_type == ErrorTarget.GEN2_DATASTORE:
            schema_type = AzureDataLakeGen2Schema
        elif entity_type == ErrorTarget.FILE_DATASTORE:
            schema_type = AzureFileSchema
        elif entity_type == ErrorTarget.ENVIRONMENT:
            schema_type = EnvironmentSchema
        else:
            schema_type = ""

        resolutions += " " + REF_DOC_ERROR_MESSAGE_MAP.get(schema_type, "")
    else:
        description = ""

    resolutions += "\n"
    formatted_error = SCHEMA_VALIDATION_ERROR_TEMPLATE.format(
        description=description,
        error_msg=errors,
        parsed_error_details=details,
        resolutions=resolutions,
        text_color=Fore.WHITE,
        link_color=Fore.CYAN,
        reset=Fore.RESET,
    )

    return formatted_error


def log_and_raise_error(error: Exception, debug: bool = False, yaml_operation: bool = False) -> NoReturn:
    init()

    # use an f-string to automatically call str() on error
    if debug:
        module_logger.error(traceback.print_exc())

    if isinstance(error, SchemaValidationError):
        module_logger.debug(traceback.format_exc())
        try:
            formatted_error = format_create_validation_error(error.messages[0], yaml_operation=yaml_operation)
        except NotImplementedError:
            formatted_error = error
    elif isinstance(error, ValidationException):
        module_logger.debug(traceback.format_exc())
        try:
            error_type = error.error_type
            if error_type == ValidationErrorType.GENERIC:
                formatted_error = error
            else:
                formatted_error = format_create_validation_error(error, yaml_operation=yaml_operation)
        except NotImplementedError:
            formatted_error = error
    else:
        raise error

    raise MlException(message=formatted_error, no_personal_data_message=formatted_error)