diff options
| author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
|---|---|---|
| committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
| commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
| tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore | |
| parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
| download | gn-ai-master.tar.gz | |
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore')
9 files changed, 1149 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_constants.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_constants.py new file mode 100644 index 00000000..97a257ab --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_constants.py @@ -0,0 +1,8 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# Miscellaneous +HTTPS = "https" +HTTP = "http" +WORKSPACE_BLOB_STORE = "workspaceblobstore" diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_on_prem.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_on_prem.py new file mode 100644 index 00000000..e6c0dc3f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_on_prem.py @@ -0,0 +1,121 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from base64 import b64encode +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import Datastore as DatastoreData +from azure.ai.ml._restclient.v2023_04_01_preview.models import DatastoreType +from azure.ai.ml._restclient.v2023_04_01_preview.models import HdfsDatastore as RestHdfsDatastore +from azure.ai.ml._schema._datastore._on_prem import HdfsSchema +from azure.ai.ml._utils._experimental import experimental +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.entities._datastore.datastore import Datastore +from azure.ai.ml.entities._datastore.utils import _from_rest_datastore_credentials_preview +from azure.ai.ml.entities._util import load_from_dict + +from ._constants import HTTP +from ._on_prem_credentials import KerberosKeytabCredentials, KerberosPasswordCredentials + + +@experimental +class HdfsDatastore(Datastore): + """HDFS datastore that is linked to an Azure ML workspace. + + :param name: Name of the datastore. + :type name: str + :param name_node_address: IP Address or DNS HostName. + :type name_node_address: str + :param hdfs_server_certificate: The TLS cert of the HDFS server (optional). + Needs to be a local path on create and will be a base64 encoded string on get. + :type hdfs_server_certificate: str + :param protocol: http or https + :type protocol: str + :param description: Description of the resource. + :type description: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param credentials: Credentials to use for Azure ML workspace to connect to the storage. + :type credentials: Union[KerberosKeytabCredentials, KerberosPasswordCredentials] + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__( + self, + *, + name: str, + name_node_address: str, + hdfs_server_certificate: Optional[str] = None, + protocol: str = HTTP, + description: Optional[str] = None, + tags: Optional[Dict] = None, + properties: Optional[Dict] = None, + credentials: Optional[Union[KerberosKeytabCredentials, KerberosPasswordCredentials]], + **kwargs: Any + ): + kwargs[TYPE] = DatastoreType.HDFS + super().__init__( + name=name, description=description, tags=tags, properties=properties, credentials=credentials, **kwargs + ) + + self.hdfs_server_certificate = hdfs_server_certificate + self.name_node_address = name_node_address + self.protocol = protocol + + def _to_rest_object(self) -> DatastoreData: + use_this_cert = None + if self.hdfs_server_certificate: + with open(self.hdfs_server_certificate, "rb") as f: + use_this_cert = b64encode(f.read()).decode("utf-8") + hdfs_ds = RestHdfsDatastore( + credentials=self.credentials._to_rest_object(), + hdfs_server_certificate=use_this_cert, + name_node_address=self.name_node_address, + protocol=self.protocol, + description=self.description, + tags=self.tags, + ) + return DatastoreData(properties=hdfs_ds) + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "HdfsDatastore": + res: HdfsDatastore = load_from_dict(HdfsSchema, data, context, additional_message) + return res + + @classmethod + def _from_rest_object(cls, datastore_resource: DatastoreData) -> "HdfsDatastore": + properties: RestHdfsDatastore = datastore_resource.properties + return HdfsDatastore( + name=datastore_resource.name, + id=datastore_resource.id, + credentials=_from_rest_datastore_credentials_preview(properties.credentials), + hdfs_server_certificate=properties.hdfs_server_certificate, + name_node_address=properties.name_node_address, + protocol=properties.protocol, + description=properties.description, + tags=properties.tags, + ) + + def __eq__(self, other: Any) -> bool: + res: bool = ( + super().__eq__(other) + and self.hdfs_server_certificate == other.hdfs_server_certificate + and self.name_node_address == other.name_node_address + and self.protocol == other.protocol + ) + return res + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + def _to_dict(self) -> Dict: + context = {BASE_PATH_CONTEXT_KEY: Path(".").parent} + res: dict = HdfsSchema(context=context).dump(self) + return res diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_on_prem_credentials.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_on_prem_credentials.py new file mode 100644 index 00000000..b658851a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/_on_prem_credentials.py @@ -0,0 +1,128 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from base64 import b64encode +from typing import Any, Optional + +from azure.ai.ml._restclient.v2023_04_01_preview import models as model_preview +from azure.ai.ml._utils._experimental import experimental +from azure.ai.ml.entities._credentials import NoneCredentialConfiguration + + +# TODO: Move classes in this file to azure.ai.ml.entities._credentials +@experimental +class BaseKerberosCredentials(NoneCredentialConfiguration): + def __init__(self, kerberos_realm: str, kerberos_kdc_address: str, kerberos_principal: str): + super().__init__() + self.kerberos_realm = kerberos_realm + self.kerberos_kdc_address = kerberos_kdc_address + self.kerberos_principal = kerberos_principal + + +@experimental +class KerberosKeytabCredentials(BaseKerberosCredentials): + def __init__( + self, + *, + kerberos_realm: str, + kerberos_kdc_address: str, + kerberos_principal: str, + kerberos_keytab: Optional[str], + **kwargs: Any, + ): + super().__init__( + kerberos_realm=kerberos_realm, + kerberos_kdc_address=kerberos_kdc_address, + kerberos_principal=kerberos_principal, + **kwargs, + ) + self.type = model_preview.CredentialsType.KERBEROS_KEYTAB + self.kerberos_keytab = kerberos_keytab + + def _to_rest_object(self) -> model_preview.KerberosKeytabCredentials: + use_this_keytab = None + if self.kerberos_keytab: + with open(self.kerberos_keytab, "rb") as f: + use_this_keytab = b64encode(f.read()).decode("utf-8") + secrets = model_preview.KerberosKeytabSecrets(kerberos_keytab=use_this_keytab) + return model_preview.KerberosKeytabCredentials( + kerberos_kdc_address=self.kerberos_kdc_address, + kerberos_principal=self.kerberos_principal, + kerberos_realm=self.kerberos_realm, + secrets=secrets, + ) + + @classmethod + def _from_rest_object(cls, obj: model_preview.KerberosKeytabCredentials) -> "KerberosKeytabCredentials": + return cls( + kerberos_kdc_address=obj.kerberos_kdc_address, + kerberos_principal=obj.kerberos_principal, + kerberos_realm=obj.kerberos_realm, + kerberos_keytab=obj.secrets.kerberos_keytab if obj.secrets else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, KerberosKeytabCredentials): + return NotImplemented + return ( + self.kerberos_kdc_address == other.kerberos_kdc_address + and self.kerberos_principal == other.kerberos_principal + and self.kerberos_realm == other.kerberos_realm + and self.kerberos_keytab == other.kerberos_keytab + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + +@experimental +class KerberosPasswordCredentials(BaseKerberosCredentials): + def __init__( + self, + *, + kerberos_realm: str, + kerberos_kdc_address: str, + kerberos_principal: str, + kerberos_password: Optional[str], + **kwargs: Any, + ): + super().__init__( + kerberos_realm=kerberos_realm, + kerberos_kdc_address=kerberos_kdc_address, + kerberos_principal=kerberos_principal, + **kwargs, + ) + self.type = model_preview.CredentialsType.KERBEROS_PASSWORD + self.kerberos_password = kerberos_password + + def _to_rest_object(self) -> model_preview.KerberosPasswordCredentials: + secrets = model_preview.KerberosPasswordSecrets(kerberos_password=self.kerberos_password) + return model_preview.KerberosPasswordCredentials( + kerberos_kdc_address=self.kerberos_kdc_address, + kerberos_principal=self.kerberos_principal, + kerberos_realm=self.kerberos_realm, + secrets=secrets, + ) + + @classmethod + def _from_rest_object(cls, obj: model_preview.KerberosPasswordCredentials) -> "KerberosPasswordCredentials": + return cls( + kerberos_kdc_address=obj.kerberos_kdc_address, + kerberos_principal=obj.kerberos_principal, + kerberos_realm=obj.kerberos_realm, + kerberos_password=obj.secrets.kerberos_password if obj.secrets else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, KerberosPasswordCredentials): + return NotImplemented + return ( + self.kerberos_kdc_address == other.kerberos_kdc_address + and self.kerberos_principal == other.kerberos_principal + and self.kerberos_realm == other.kerberos_realm + and self.kerberos_password == other.kerberos_password + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/adls_gen1.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/adls_gen1.py new file mode 100644 index 00000000..c2610703 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/adls_gen1.py @@ -0,0 +1,106 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + AzureDataLakeGen1Datastore as RestAzureDatalakeGen1Datastore, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import Datastore as DatastoreData +from azure.ai.ml._restclient.v2023_04_01_preview.models import DatastoreType +from azure.ai.ml._schema._datastore.adls_gen1 import AzureDataLakeGen1Schema +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.entities._credentials import CertificateConfiguration, ServicePrincipalConfiguration +from azure.ai.ml.entities._datastore.datastore import Datastore +from azure.ai.ml.entities._datastore.utils import from_rest_datastore_credentials +from azure.ai.ml.entities._util import load_from_dict + + +class AzureDataLakeGen1Datastore(Datastore): + """Azure Data Lake aka Gen 1 datastore that is linked to an Azure ML workspace. + + :param name: Name of the datastore. + :type name: str + :param store_name: Name of the Azure storage resource. + :type store_name: str + :param description: Description of the resource. + :type description: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param credentials: Credentials to use for Azure ML workspace to connect to the storage. + :type credentials: Union[ServicePrincipalSection, CertificateSection] + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__( + self, + *, + name: str, + store_name: str, + description: Optional[str] = None, + tags: Optional[Dict] = None, + properties: Optional[Dict] = None, + credentials: Optional[Union[CertificateConfiguration, ServicePrincipalConfiguration]] = None, + **kwargs: Any + ): + kwargs[TYPE] = DatastoreType.AZURE_DATA_LAKE_GEN1 + super().__init__( + name=name, description=description, tags=tags, properties=properties, credentials=credentials, **kwargs + ) + + self.store_name = store_name + + def _to_rest_object(self) -> DatastoreData: + gen1_ds = RestAzureDatalakeGen1Datastore( + credentials=self.credentials._to_datastore_rest_object(), + store_name=self.store_name, + description=self.description, + tags=self.tags, + ) + return DatastoreData(properties=gen1_ds) + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "AzureDataLakeGen1Datastore": + res: AzureDataLakeGen1Datastore = load_from_dict( + AzureDataLakeGen1Schema, data, context, additional_message, **kwargs + ) + return res + + @classmethod + def _from_rest_object(cls, datastore_resource: DatastoreData) -> "AzureDataLakeGen1Datastore": + properties: RestAzureDatalakeGen1Datastore = datastore_resource.properties + return AzureDataLakeGen1Datastore( + id=datastore_resource.id, + name=datastore_resource.name, + store_name=properties.store_name, + credentials=from_rest_datastore_credentials(properties.credentials), # type: ignore[arg-type] + description=properties.description, + tags=properties.tags, + ) + + def __eq__(self, other: Any) -> bool: + res: bool = ( + super().__eq__(other) + and self.name == other.name + and self.type == other.type + and self.store_name == other.store_name + and self.credentials == other.credentials + ) + return res + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + def _to_dict(self) -> Dict: + context = {BASE_PATH_CONTEXT_KEY: Path(".").parent} + res: dict = AzureDataLakeGen1Schema(context=context).dump(self) + return res diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/azure_storage.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/azure_storage.py new file mode 100644 index 00000000..0fff1925 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/azure_storage.py @@ -0,0 +1,337 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._azure_environments import _get_storage_endpoint_from_metadata +from azure.ai.ml._restclient.v2023_04_01_preview.models import AzureBlobDatastore as RestAzureBlobDatastore +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + AzureDataLakeGen2Datastore as RestAzureDataLakeGen2Datastore, +) +from azure.ai.ml._restclient.v2023_04_01_preview.models import AzureFileDatastore as RestAzureFileDatastore +from azure.ai.ml._restclient.v2023_04_01_preview.models import Datastore as DatastoreData +from azure.ai.ml._restclient.v2023_04_01_preview.models import DatastoreType +from azure.ai.ml._schema._datastore import AzureBlobSchema, AzureDataLakeGen2Schema, AzureFileSchema +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.entities._credentials import ( + AccountKeyConfiguration, + CertificateConfiguration, + SasTokenConfiguration, + ServicePrincipalConfiguration, +) +from azure.ai.ml.entities._datastore.datastore import Datastore +from azure.ai.ml.entities._datastore.utils import from_rest_datastore_credentials +from azure.ai.ml.entities._util import load_from_dict + +from ._constants import HTTPS + + +class AzureFileDatastore(Datastore): + """Azure file share that is linked to an Azure ML workspace. + + :param name: Name of the datastore. + :type name: str + :param account_name: Name of the Azure storage account. + :type account_name: str + :param file_share_name: Name of the file share. + :type file_share_name: str + :param description: Description of the resource. + :type description: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param endpoint: Endpoint to use to connect with the Azure storage account + :type endpoint: str + :param protocol: Protocol to use to connect with the Azure storage account + :type protocol: str + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param credentials: Credentials to use for Azure ML workspace to connect to the storage. Defaults to None. + :type credentials: Union[~azure.ai.ml.entities.AccountKeyConfiguration, + ~azure.ai.ml.entities.SasTokenConfiguration] + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__( + self, + *, + name: str, + account_name: str, + file_share_name: str, + description: Optional[str] = None, + tags: Optional[Dict] = None, + endpoint: str = _get_storage_endpoint_from_metadata(), + protocol: str = HTTPS, + properties: Optional[Dict] = None, + credentials: Optional[Union[AccountKeyConfiguration, SasTokenConfiguration]] = None, + **kwargs: Any + ): + kwargs[TYPE] = DatastoreType.AZURE_FILE + super().__init__( + name=name, description=description, tags=tags, properties=properties, credentials=credentials, **kwargs + ) + self.file_share_name = file_share_name + self.account_name = account_name + self.endpoint = endpoint + self.protocol = protocol + + def _to_rest_object(self) -> DatastoreData: + file_ds = RestAzureFileDatastore( + account_name=self.account_name, + file_share_name=self.file_share_name, + credentials=self.credentials._to_datastore_rest_object(), + endpoint=self.endpoint, + protocol=self.protocol, + description=self.description, + tags=self.tags, + ) + return DatastoreData(properties=file_ds) + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "AzureFileDatastore": + res: AzureFileDatastore = load_from_dict(AzureFileSchema, data, context, additional_message) + return res + + @classmethod + def _from_rest_object(cls, datastore_resource: DatastoreData) -> "AzureFileDatastore": + properties: RestAzureFileDatastore = datastore_resource.properties + return AzureFileDatastore( + name=datastore_resource.name, + id=datastore_resource.id, + account_name=properties.account_name, + credentials=from_rest_datastore_credentials(properties.credentials), # type: ignore[arg-type] + endpoint=properties.endpoint, + protocol=properties.protocol, + file_share_name=properties.file_share_name, + description=properties.description, + tags=properties.tags, + ) + + def __eq__(self, other: Any) -> bool: + res: bool = ( + super().__eq__(other) + and self.file_share_name == other.file_share_name + and self.account_name == other.account_name + and self.endpoint == other.endpoint + and self.protocol == other.protocol + ) + return res + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + def _to_dict(self) -> Dict: + context = {BASE_PATH_CONTEXT_KEY: Path(".").parent} + res: dict = AzureFileSchema(context=context).dump(self) + return res + + +class AzureBlobDatastore(Datastore): + """Azure blob storage that is linked to an Azure ML workspace. + + :param name: Name of the datastore. + :type name: str + :param account_name: Name of the Azure storage account. + :type account_name: str + :param container_name: Name of the container. + :type container_name: str + :param description: Description of the resource. + :type description: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param endpoint: Endpoint to use to connect with the Azure storage account. + :type endpoint: str + :param protocol: Protocol to use to connect with the Azure storage account. + :type protocol: str + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param credentials: Credentials to use for Azure ML workspace to connect to the storage. + :type credentials: Union[~azure.ai.ml.entities.AccountKeyConfiguration, + ~azure.ai.ml.entities.SasTokenConfiguration] + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__( + self, + *, + name: str, + account_name: str, + container_name: str, + description: Optional[str] = None, + tags: Optional[Dict] = None, + endpoint: Optional[str] = None, + protocol: str = HTTPS, + properties: Optional[Dict] = None, + credentials: Optional[Union[AccountKeyConfiguration, SasTokenConfiguration]] = None, + **kwargs: Any + ): + kwargs[TYPE] = DatastoreType.AZURE_BLOB + super().__init__( + name=name, description=description, tags=tags, properties=properties, credentials=credentials, **kwargs + ) + + self.container_name = container_name + self.account_name = account_name + self.endpoint = endpoint if endpoint else _get_storage_endpoint_from_metadata() + self.protocol = protocol + + def _to_rest_object(self) -> DatastoreData: + blob_ds = RestAzureBlobDatastore( + account_name=self.account_name, + container_name=self.container_name, + credentials=self.credentials._to_datastore_rest_object(), + endpoint=self.endpoint, + protocol=self.protocol, + tags=self.tags, + description=self.description, + ) + return DatastoreData(properties=blob_ds) + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "AzureBlobDatastore": + res: AzureBlobDatastore = load_from_dict(AzureBlobSchema, data, context, additional_message) + return res + + @classmethod + def _from_rest_object(cls, datastore_resource: DatastoreData) -> "AzureBlobDatastore": + properties: RestAzureBlobDatastore = datastore_resource.properties + return AzureBlobDatastore( + name=datastore_resource.name, + id=datastore_resource.id, + account_name=properties.account_name, + credentials=from_rest_datastore_credentials(properties.credentials), # type: ignore[arg-type] + endpoint=properties.endpoint, + protocol=properties.protocol, + container_name=properties.container_name, + description=properties.description, + tags=properties.tags, + ) + + def __eq__(self, other: Any) -> bool: + res: bool = ( + super().__eq__(other) + and self.container_name == other.container_name + and self.account_name == other.account_name + and self.endpoint == other.endpoint + and self.protocol == other.protocol + ) + return res + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + def _to_dict(self) -> Dict: + context = {BASE_PATH_CONTEXT_KEY: Path(".").parent} + res: dict = AzureBlobSchema(context=context).dump(self) + return res + + +class AzureDataLakeGen2Datastore(Datastore): + """Azure data lake gen 2 that is linked to an Azure ML workspace. + + :param name: Name of the datastore. + :type name: str + :param account_name: Name of the Azure storage account. + :type account_name: str + :param filesystem: The name of the Data Lake Gen2 filesystem. + :type filesystem: str + :param description: Description of the resource. + :type description: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param endpoint: Endpoint to use to connect with the Azure storage account + :type endpoint: str + :param protocol: Protocol to use to connect with the Azure storage account + :type protocol: str + :param credentials: Credentials to use for Azure ML workspace to connect to the storage. + :type credentials: Union[ + ~azure.ai.ml.entities.ServicePrincipalConfiguration, + ~azure.ai.ml.entities.CertificateConfiguration + + ] + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__( + self, + *, + name: str, + account_name: str, + filesystem: str, + description: Optional[str] = None, + tags: Optional[Dict] = None, + endpoint: str = _get_storage_endpoint_from_metadata(), + protocol: str = HTTPS, + properties: Optional[Dict] = None, + credentials: Optional[Union[ServicePrincipalConfiguration, CertificateConfiguration]] = None, + **kwargs: Any + ): + kwargs[TYPE] = DatastoreType.AZURE_DATA_LAKE_GEN2 + super().__init__( + name=name, description=description, tags=tags, properties=properties, credentials=credentials, **kwargs + ) + + self.account_name = account_name + self.filesystem = filesystem + self.endpoint = endpoint + self.protocol = protocol + + def _to_rest_object(self) -> DatastoreData: + gen2_ds = RestAzureDataLakeGen2Datastore( + account_name=self.account_name, + filesystem=self.filesystem, + credentials=self.credentials._to_datastore_rest_object(), + endpoint=self.endpoint, + protocol=self.protocol, + description=self.description, + tags=self.tags, + ) + return DatastoreData(properties=gen2_ds) + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "AzureDataLakeGen2Datastore": + res: AzureDataLakeGen2Datastore = load_from_dict(AzureDataLakeGen2Schema, data, context, additional_message) + return res + + @classmethod + def _from_rest_object(cls, datastore_resource: DatastoreData) -> "AzureDataLakeGen2Datastore": + properties: RestAzureDataLakeGen2Datastore = datastore_resource.properties + return AzureDataLakeGen2Datastore( + name=datastore_resource.name, + id=datastore_resource.id, + account_name=properties.account_name, + credentials=from_rest_datastore_credentials(properties.credentials), # type: ignore[arg-type] + endpoint=properties.endpoint, + protocol=properties.protocol, + filesystem=properties.filesystem, + description=properties.description, + tags=properties.tags, + ) + + def __eq__(self, other: Any) -> bool: + res: bool = ( + super().__eq__(other) + and self.filesystem == other.filesystem + and self.account_name == other.account_name + and self.endpoint == other.endpoint + and self.protocol == other.protocol + ) + return res + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + def _to_dict(self) -> Dict: + context = {BASE_PATH_CONTEXT_KEY: Path(".").parent} + res: dict = AzureDataLakeGen2Schema(context=context).dump(self) + return res diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/datastore.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/datastore.py new file mode 100644 index 00000000..bc933cfb --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/datastore.py @@ -0,0 +1,221 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access,redefined-builtin,arguments-renamed + +from abc import ABC, abstractmethod +from os import PathLike +from pathlib import Path +from typing import IO, Any, AnyStr, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import Datastore as DatastoreData +from azure.ai.ml._restclient.v2023_04_01_preview.models import DatastoreType +from azure.ai.ml._utils.utils import camel_to_snake, dump_yaml_to_file +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, PARAMS_OVERRIDE_KEY, CommonYamlFields +from azure.ai.ml.entities._credentials import ( + AccountKeyConfiguration, + CertificateConfiguration, + NoneCredentialConfiguration, + SasTokenConfiguration, + ServicePrincipalConfiguration, +) +from azure.ai.ml.entities._mixins import RestTranslatableMixin +from azure.ai.ml.entities._resource import Resource +from azure.ai.ml.entities._util import find_type_in_override +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException + + +class Datastore(Resource, RestTranslatableMixin, ABC): + """Datastore of an Azure ML workspace, abstract class. + + :param name: Name of the datastore. + :type name: str + :param description: Description of the resource. + :type description: str + :param credentials: Credentials to use for Azure ML workspace to connect to the storage. + :type credentials: Optional[Union[ + ~azure.ai.ml.entities.ServicePrincipalConfiguration, + ~azure.ai.ml.entities.CertificateConfiguration, + ~azure.ai.ml.entities.NoneCredentialConfiguration, + ~azure.ai.ml.entities.AccountKeyConfiguration, + ~azure.ai.ml.entities.SasTokenConfiguration + + ]] + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__( + self, + credentials: Optional[ + Union[ + ServicePrincipalConfiguration, + CertificateConfiguration, + NoneCredentialConfiguration, + AccountKeyConfiguration, + SasTokenConfiguration, + ] + ], + name: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[Dict] = None, + properties: Optional[Dict] = None, + **kwargs: Any, + ): + self._type: str = kwargs.pop("type", None) + super().__init__( + name=name, + description=description, + tags=tags, + properties=properties, + **kwargs, + ) + + self.credentials = NoneCredentialConfiguration() if credentials is None else credentials + + @property + def type(self) -> str: + return self._type + + def dump(self, dest: Union[str, PathLike, IO[AnyStr]], **kwargs: Any) -> None: + """Dump the datastore content into a file in yaml format. + + :param dest: The destination to receive this datastore's content. + Must be either a path to a local file, or an already-open file stream. + If dest is a file path, a new file will be created, + and an exception is raised if the file exists. + If dest is an open file, the file will be written to directly, + and an exception will be raised if the file is not writable. + :type dest: Union[PathLike, str, IO[AnyStr]] + """ + yaml_serialized = self._to_dict() + dump_yaml_to_file(dest, yaml_serialized, default_flow_style=False, **kwargs) + + @abstractmethod + def _to_dict(self) -> Dict: + pass + + @classmethod + def _load( + cls, + data: Optional[Dict] = None, + yaml_path: Optional[Union[PathLike, str]] = None, + params_override: Optional[list] = None, + **kwargs: Any, + ) -> "Datastore": + data = data or {} + params_override = params_override or [] + + context = { + BASE_PATH_CONTEXT_KEY: Path(yaml_path).parent if yaml_path else Path("./"), + PARAMS_OVERRIDE_KEY: params_override, + } + + from azure.ai.ml.entities import ( + AzureBlobDatastore, + AzureDataLakeGen1Datastore, + AzureDataLakeGen2Datastore, + AzureFileDatastore, + OneLakeDatastore, + ) + + # from azure.ai.ml.entities._datastore._on_prem import ( + # HdfsDatastore + # ) + + ds_type: Any = None + type_in_override = find_type_in_override(params_override) + type = type_in_override or data.get( + CommonYamlFields.TYPE, DatastoreType.AZURE_BLOB + ) # override takes the priority + + # yaml expects snake casing, while service side constants are camel casing + if type == camel_to_snake(DatastoreType.AZURE_BLOB): + ds_type = AzureBlobDatastore + elif type == camel_to_snake(DatastoreType.AZURE_FILE): + ds_type = AzureFileDatastore + elif type == camel_to_snake(DatastoreType.AZURE_DATA_LAKE_GEN1): + ds_type = AzureDataLakeGen1Datastore + elif type == camel_to_snake(DatastoreType.AZURE_DATA_LAKE_GEN2): + ds_type = AzureDataLakeGen2Datastore + elif type == camel_to_snake(DatastoreType.ONE_LAKE): + ds_type = OneLakeDatastore + # disable unless preview release + # elif type == camel_to_snake(DatastoreTypePreview.HDFS): + # ds_type = HdfsDatastore + else: + msg = f"Unsupported datastore type: {type}." + raise ValidationException( + message=msg, + error_type=ValidationErrorType.INVALID_VALUE, + target=ErrorTarget.DATASTORE, + no_personal_data_message=msg, + error_category=ErrorCategory.USER_ERROR, + ) + + res: Datastore = ds_type._load_from_dict( + data=data, + context=context, + additional_message="If the datastore type is incorrect, change the 'type' property.", + **kwargs, + ) + return res + + @classmethod + def _from_rest_object(cls, datastore_resource: DatastoreData) -> "Datastore": + from azure.ai.ml.entities import ( + AzureBlobDatastore, + AzureDataLakeGen1Datastore, + AzureDataLakeGen2Datastore, + AzureFileDatastore, + OneLakeDatastore, + ) + + # from azure.ai.ml.entities._datastore._on_prem import ( + # HdfsDatastore + # ) + + datastore_type = datastore_resource.properties.datastore_type + if datastore_type == DatastoreType.AZURE_DATA_LAKE_GEN1: + res_adl_gen1: Datastore = AzureDataLakeGen1Datastore._from_rest_object(datastore_resource) + return res_adl_gen1 + if datastore_type == DatastoreType.AZURE_DATA_LAKE_GEN2: + res_adl_gen2: Datastore = AzureDataLakeGen2Datastore._from_rest_object(datastore_resource) + return res_adl_gen2 + if datastore_type == DatastoreType.AZURE_BLOB: + res_abd: Datastore = AzureBlobDatastore._from_rest_object(datastore_resource) + return res_abd + if datastore_type == DatastoreType.AZURE_FILE: + res_afd: Datastore = AzureFileDatastore._from_rest_object(datastore_resource) + return res_afd + if datastore_type == DatastoreType.ONE_LAKE: + res_old: Datastore = OneLakeDatastore._from_rest_object(datastore_resource) + return res_old + # disable unless preview release + # elif datastore_type == DatastoreTypePreview.HDFS: + # return HdfsDatastore._from_rest_object(datastore_resource) + msg = f"Unsupported datastore type {datastore_resource.properties.contents.type}" + raise ValidationException( + message=msg, + error_type=ValidationErrorType.INVALID_VALUE, + target=ErrorTarget.DATASTORE, + no_personal_data_message=msg, + error_category=ErrorCategory.SYSTEM_ERROR, + ) + + @classmethod + @abstractmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "Datastore": + pass + + def __eq__(self, other: Any) -> bool: + res: bool = self.name == other.name and self.type == other.type and self.credentials == other.credentials + return res + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/one_lake.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/one_lake.py new file mode 100644 index 00000000..9bc06d92 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/one_lake.py @@ -0,0 +1,153 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from abc import ABC +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import Datastore as DatastoreData +from azure.ai.ml._restclient.v2023_04_01_preview.models import DatastoreType +from azure.ai.ml._restclient.v2023_04_01_preview.models import LakeHouseArtifact as RestLakeHouseArtifact +from azure.ai.ml._restclient.v2023_04_01_preview.models import NoneDatastoreCredentials as RestNoneDatastoreCredentials +from azure.ai.ml._restclient.v2023_04_01_preview.models import OneLakeDatastore as RestOneLakeDatastore +from azure.ai.ml._schema._datastore.one_lake import OneLakeSchema +from azure.ai.ml._utils._experimental import experimental +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE +from azure.ai.ml.entities._credentials import NoneCredentialConfiguration, ServicePrincipalConfiguration +from azure.ai.ml.entities._datastore.datastore import Datastore +from azure.ai.ml.entities._datastore.utils import from_rest_datastore_credentials +from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin +from azure.ai.ml.entities._util import load_from_dict + + +@experimental +class OneLakeArtifact(RestTranslatableMixin, DictMixin, ABC): + """OneLake artifact (data source) backing the OneLake workspace. + + :param name: OneLake artifact name/GUID. ex) 01234567-abcd-1234-5678-012345678901 + :type name: str + :param type: OneLake artifact type. Only LakeHouse artifacts are currently supported. + :type type: str + """ + + def __init__(self, *, name: str, type: Optional[str] = None): + super().__init__() + self.name = name + self.type = type + + +@experimental +class LakeHouseArtifact(OneLakeArtifact): + """LakeHouse artifact type for OneLake. + + :param artifact_name: OneLake LakeHouse artifact name/GUID. ex) 01234567-abcd-1234-5678-012345678901 + :type artifact_name: str + """ + + def __init__(self, *, name: str): + super(LakeHouseArtifact, self).__init__(name=name, type="lake_house") + + def _to_datastore_rest_object(self) -> RestLakeHouseArtifact: + return RestLakeHouseArtifact(artifact_name=self.name) + + +@experimental +class OneLakeDatastore(Datastore): + """OneLake datastore that is linked to an Azure ML workspace. + + :param name: Name of the datastore. + :type name: str + :param artifact: OneLake Artifact. Only LakeHouse artifacts are currently supported. + :type artifact: ~azure.ai.ml.entities.OneLakeArtifact + :param one_lake_workspace_name: OneLake workspace name/GUID. ex) 01234567-abcd-1234-5678-012345678901 + :type one_lake_workspace_name: str + :param endpoint: OneLake endpoint to use for the datastore. ex) https://onelake.dfs.fabric.microsoft.com + :type endpoint: str + :param description: Description of the resource. + :type description: str + :param tags: Tag dictionary. Tags can be added, removed, and updated. + :type tags: dict[str, str] + :param properties: The asset property dictionary. + :type properties: dict[str, str] + :param credentials: Credentials to use to authenticate against OneLake. + :type credentials: Union[ + ~azure.ai.ml.entities.ServicePrincipalConfiguration, ~azure.ai.ml.entities.NoneCredentialConfiguration] + :param kwargs: A dictionary of additional configuration parameters. + :type kwargs: dict + """ + + def __init__( + self, + *, + name: str, + artifact: OneLakeArtifact, + one_lake_workspace_name: str, + endpoint: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[Dict] = None, + properties: Optional[Dict] = None, + credentials: Optional[Union[NoneCredentialConfiguration, ServicePrincipalConfiguration]] = None, + **kwargs: Any + ): + kwargs[TYPE] = DatastoreType.ONE_LAKE + super().__init__( + name=name, description=description, tags=tags, properties=properties, credentials=credentials, **kwargs + ) + self.artifact = artifact + self.one_lake_workspace_name = one_lake_workspace_name + self.endpoint = endpoint + + def _to_rest_object(self) -> DatastoreData: + one_lake_ds = RestOneLakeDatastore( + credentials=( + RestNoneDatastoreCredentials() + if self.credentials is None + else self.credentials._to_datastore_rest_object() + ), + artifact=RestLakeHouseArtifact(artifact_name=self.artifact["name"]), + one_lake_workspace_name=self.one_lake_workspace_name, + endpoint=self.endpoint, + description=self.description, + tags=self.tags, + ) + return DatastoreData(properties=one_lake_ds) + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "OneLakeDatastore": + res: OneLakeDatastore = load_from_dict(OneLakeSchema, data, context, additional_message, **kwargs) + return res + + @classmethod + def _from_rest_object(cls, datastore_resource: DatastoreData) -> "OneLakeDatastore": + properties: RestOneLakeDatastore = datastore_resource.properties + return OneLakeDatastore( + name=datastore_resource.name, + id=datastore_resource.id, + artifact=LakeHouseArtifact(name=properties.artifact.artifact_name), + one_lake_workspace_name=properties.one_lake_workspace_name, + endpoint=properties.endpoint, + credentials=from_rest_datastore_credentials(properties.credentials), # type: ignore[arg-type] + description=properties.description, + tags=properties.tags, + ) + + def __eq__(self, other: Any) -> bool: + res: bool = ( + super().__eq__(other) + and self.one_lake_workspace_name == other.one_lake_workspace_name + and self.artifact.type == other.artifact["type"] + and self.artifact.name == other.artifact["name"] + and self.endpoint == other.endpoint + ) + return res + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + def _to_dict(self) -> Dict: + context = {BASE_PATH_CONTEXT_KEY: Path(".").parent} + res: dict = OneLakeSchema(context=context).dump(self) + return res diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/utils.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/utils.py new file mode 100644 index 00000000..538f9590 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_datastore/utils.py @@ -0,0 +1,70 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import Any, Optional, Union, cast + +from azure.ai.ml._restclient.v2023_04_01_preview import models +from azure.ai.ml._restclient.v2024_07_01_preview import models as models2024 +from azure.ai.ml.entities._credentials import ( + AccountKeyConfiguration, + CertificateConfiguration, + NoneCredentialConfiguration, + SasTokenConfiguration, + ServicePrincipalConfiguration, +) +from azure.ai.ml.entities._datastore._on_prem_credentials import KerberosKeytabCredentials, KerberosPasswordCredentials + + +def from_rest_datastore_credentials( + rest_credentials: models.DatastoreCredentials, +) -> Union[ + AccountKeyConfiguration, + SasTokenConfiguration, + ServicePrincipalConfiguration, + CertificateConfiguration, + NoneCredentialConfiguration, +]: + config_class: Any = NoneCredentialConfiguration + + if isinstance(rest_credentials, (models.AccountKeyDatastoreCredentials, models2024.AccountKeyDatastoreCredentials)): + # we are no more using key for key base account. + # https://github.com/Azure/azure-sdk-for-python/pull/35716 + if isinstance(rest_credentials.secrets, models2024.SasDatastoreSecrets): + config_class = SasTokenConfiguration + else: + config_class = AccountKeyConfiguration + elif isinstance(rest_credentials, (models.SasDatastoreCredentials, models2024.SasDatastoreCredentials)): + config_class = SasTokenConfiguration + elif isinstance( + rest_credentials, (models.ServicePrincipalDatastoreCredentials, models2024.ServicePrincipalDatastoreCredentials) + ): + config_class = ServicePrincipalConfiguration + elif isinstance( + rest_credentials, (models.CertificateDatastoreCredentials, models2024.CertificateDatastoreCredentials) + ): + config_class = CertificateConfiguration + + return cast( + Union[ + AccountKeyConfiguration, + SasTokenConfiguration, + ServicePrincipalConfiguration, + CertificateConfiguration, + NoneCredentialConfiguration, + ], + config_class._from_datastore_rest_object(rest_credentials), + ) + + +def _from_rest_datastore_credentials_preview( + rest_credentials: models.DatastoreCredentials, +) -> Optional[Union[KerberosKeytabCredentials, KerberosPasswordCredentials]]: + if isinstance(rest_credentials, models.KerberosKeytabCredentials): + return KerberosKeytabCredentials._from_rest_object(rest_credentials) + if isinstance(rest_credentials, models.KerberosPasswordCredentials): + return KerberosPasswordCredentials._from_rest_object(rest_credentials) + + return None |
