aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_data_utils.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_data_utils.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_data_utils.py')
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_data_utils.py82
1 files changed, 82 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_data_utils.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_data_utils.py
new file mode 100644
index 00000000..bde91d34
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_data_utils.py
@@ -0,0 +1,82 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+import io
+import json
+import logging
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Dict, Union
+from urllib.parse import urljoin, urlparse
+
+import yaml
+from jsonschema import Draft7Validator, ValidationError
+from jsonschema.exceptions import best_match
+
+from azure.ai.ml._artifacts._artifact_utilities import get_datastore_info, get_storage_client
+from azure.ai.ml._artifacts._constants import INVALID_MLTABLE_METADATA_SCHEMA_ERROR, INVALID_MLTABLE_METADATA_SCHEMA_MSG
+from azure.ai.ml.constants._common import DefaultOpenEncoding
+from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException
+from azure.ai.ml.operations._datastore_operations import DatastoreOperations
+
+from ._http_utils import HttpPipeline
+from ._storage_utils import AzureMLDatastorePathUri
+from .utils import load_yaml
+
+module_logger = logging.getLogger(__name__)
+
+
+def download_mltable_metadata_schema(mltable_schema_url: str, requests_pipeline: HttpPipeline):
+ response = requests_pipeline.get(mltable_schema_url)
+ return response.json()
+
+
+def read_local_mltable_metadata_contents(*, path: str) -> Dict:
+ metadata_path = str(Path(path, "MLTable"))
+ return load_yaml(metadata_path)
+
+
+def read_remote_mltable_metadata_contents(
+ *,
+ base_uri: str,
+ datastore_operations: DatastoreOperations,
+ requests_pipeline: HttpPipeline,
+) -> Union[Dict, None]:
+ scheme = urlparse(base_uri).scheme
+ if scheme == "https":
+ response = requests_pipeline.get(urljoin(base_uri, "MLTable"))
+ yaml_file = io.BytesIO(response.content)
+ return yaml.safe_load(yaml_file)
+ if scheme == "azureml":
+ datastore_path_uri = AzureMLDatastorePathUri(base_uri)
+ datastore_info = get_datastore_info(datastore_operations, datastore_path_uri.datastore)
+ storage_client = get_storage_client(**datastore_info)
+ with TemporaryDirectory() as tmp_dir:
+ starts_with = datastore_path_uri.path.rstrip("/")
+ storage_client.download(f"{starts_with}/MLTable", tmp_dir)
+ downloaded_mltable_path = Path(tmp_dir, "MLTable")
+ with open(downloaded_mltable_path, "r", encoding=DefaultOpenEncoding.READ) as f:
+ return yaml.safe_load(f)
+ return None
+
+
+def validate_mltable_metadata(*, mltable_metadata_dict: Dict, mltable_schema: Dict):
+ # use json-schema to validate dict
+ error: Union[ValidationError, None] = best_match(Draft7Validator(mltable_schema).iter_errors(mltable_metadata_dict))
+ if error:
+ err_path = ".".join(error.path)
+ err_path = f"{err_path}: " if err_path != "" else ""
+ msg = INVALID_MLTABLE_METADATA_SCHEMA_ERROR.format(
+ jsonSchemaErrorPath=err_path,
+ jsonSchemaMessage=error.message,
+ invalidMLTableMsg=INVALID_MLTABLE_METADATA_SCHEMA_MSG,
+ invalidSchemaSnippet=json.dumps(error.schema, indent=2),
+ )
+ raise ValidationException(
+ message=msg,
+ no_personal_data_message=INVALID_MLTABLE_METADATA_SCHEMA_MSG,
+ error_type=ValidationErrorType.INVALID_VALUE,
+ target=ErrorTarget.DATA,
+ error_category=ErrorCategory.USER_ERROR,
+ )