diff options
| author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
|---|---|---|
| committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
| commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
| tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio | |
| parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
| download | gn-ai-master.tar.gz | |
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio')
11 files changed, 4626 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/__init__.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/__init__.py new file mode 100644 index 00000000..c24dde8d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/__init__.py @@ -0,0 +1,24 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +from ._download_async import StorageStreamDownloader +from .._shared.policies_async import ExponentialRetry, LinearRetry +from ._data_lake_file_client_async import DataLakeFileClient +from ._data_lake_directory_client_async import DataLakeDirectoryClient +from ._file_system_client_async import FileSystemClient +from ._data_lake_service_client_async import DataLakeServiceClient +from ._data_lake_lease_async import DataLakeLeaseClient + +__all__ = [ + 'DataLakeServiceClient', + 'FileSystemClient', + 'DataLakeDirectoryClient', + 'DataLakeFileClient', + 'DataLakeLeaseClient', + 'ExponentialRetry', + 'LinearRetry', + 'StorageStreamDownloader' +] diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_directory_client_async.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_directory_client_async.py new file mode 100644 index 00000000..578f896e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_directory_client_async.py @@ -0,0 +1,721 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +# pylint: disable=invalid-overridden-method, docstring-keyword-should-match-keyword-only + +import functools +from typing import ( + Any, Dict, Optional, Union, + TYPE_CHECKING +) + +try: + from urllib.parse import quote, unquote +except ImportError: + from urllib2 import quote, unquote # type: ignore + +from azure.core.async_paging import AsyncItemPaged +from azure.core.pipeline import AsyncPipeline +from azure.core.tracing.decorator import distributed_trace +from azure.core.tracing.decorator_async import distributed_trace_async +from .._data_lake_directory_client import DataLakeDirectoryClient as DataLakeDirectoryClientBase +from .._deserialize import deserialize_dir_properties +from .._models import DirectoryProperties, FileProperties +from .._shared.base_client_async import AsyncTransportWrapper +from ._data_lake_file_client_async import DataLakeFileClient +from ._list_paths_helper import PathPropertiesPaged +from ._path_client_async import PathClient + +if TYPE_CHECKING: + from azure.core.credentials import AzureNamedKeyCredential, AzureSasCredential + from azure.core.credentials_async import AsyncTokenCredential + from datetime import datetime + from .._models import PathProperties + + +class DataLakeDirectoryClient(PathClient, DataLakeDirectoryClientBase): + """A client to interact with the DataLake directory, even if the directory may not yet exist. + + For operations relating to a specific subdirectory or file under the directory, a directory client or file client + can be retrieved using the :func:`~get_sub_directory_client` or :func:`~get_file_client` functions. + + :ivar str url: + The full endpoint URL to the file system, including SAS token if used. + :ivar str primary_endpoint: + The full primary endpoint URL. + :ivar str primary_hostname: + The hostname of the primary endpoint. + :param str account_url: + The URI to the storage account. + :param file_system_name: + The file system for the directory or files. + :type file_system_name: str + :param directory_name: + The whole path of the directory. eg. {directory under file system}/{directory to interact with} + :type directory_name: str + :param credential: + The credentials with which to authenticate. This is optional if the + account URL already has a SAS token. The value can be a SAS token string, + an instance of a AzureSasCredential or AzureNamedKeyCredential from azure.core.credentials, + an account shared access key, or an instance of a TokenCredentials class from azure.identity. + If the resource URI already contains a SAS token, this will be ignored in favor of an explicit credential + - except in the case of AzureSasCredential, where the conflicting SAS tokens will raise a ValueError. + If using an instance of AzureNamedKeyCredential, "name" should be the storage account name, and "key" + should be the storage account key. + :type credential: + ~azure.core.credentials.AzureNamedKeyCredential or + ~azure.core.credentials.AzureSasCredential or + ~azure.core.credentials_async.AsyncTokenCredential or + str or dict[str, str] or None + :keyword str api_version: + The Storage API version to use for requests. Default value is the most recent service version that is + compatible with the current SDK. Setting to an older version may result in reduced feature compatibility. + :keyword str audience: The audience to use when requesting tokens for Azure Active Directory + authentication. Only has an effect when credential is of type TokenCredential. The value could be + https://storage.azure.com/ (default) or https://<account>.blob.core.windows.net. + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_instantiate_client_async.py + :start-after: [START instantiate_directory_client_from_conn_str] + :end-before: [END instantiate_directory_client_from_conn_str] + :language: python + :dedent: 4 + :caption: Creating the DataLakeServiceClient from connection string. + """ + + def __init__( + self, account_url: str, + file_system_name: str, + directory_name: str, + credential: Optional[Union[str, Dict[str, str], "AzureNamedKeyCredential", "AzureSasCredential", "AsyncTokenCredential"]] = None, # pylint: disable=line-too-long + **kwargs: Any + ) -> None: + super(DataLakeDirectoryClient, self).__init__(account_url, file_system_name, directory_name, # pylint: disable=specify-parameter-names-in-call + credential=credential, **kwargs) + + @distributed_trace_async + async def create_directory(self, metadata=None, # type: Optional[Dict[str, str]] + **kwargs): + # type: (...) -> Dict[str, Union[str, datetime]] + """ + Create a new directory. + + :param metadata: + Name-value pairs associated with the directory as metadata. + :type metadata: dict(str, str) + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword lease: + Required if the directory has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword str umask: + Optional and only valid if Hierarchical Namespace is enabled for the account. + When creating a file or directory and the parent folder does not have a default ACL, + the umask restricts the permissions of the file or directory to be created. + The resulting permission is given by p & ^u, where p is the permission and u is the umask. + For example, if p is 0777 and u is 0057, then the resulting permission is 0720. + The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. + The umask must be specified in 4-digit octal notation (e.g. 0766). + :keyword str owner: + The owner of the file or directory. + :keyword str group: + The owning group of the file or directory. + :keyword str acl: + Sets POSIX access control rights on files and directories. The value is a + comma-separated list of access control entries. Each access control entry (ACE) consists of a + scope, a type, a user or group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :keyword str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. + :keyword str permissions: + Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: A dictionary of response headers. + :rtype: dict[str, str] or dict[str, ~datetime.datetime] + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_directory_async.py + :start-after: [START create_directory] + :end-before: [END create_directory] + :language: python + :dedent: 8 + :caption: Create directory. + """ + return await self._create('directory', metadata=metadata, **kwargs) + + @distributed_trace_async + async def exists(self, **kwargs): + # type: (**Any) -> bool + """ + Returns True if a directory exists and returns False otherwise. + + :kwarg int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: True if a directory exists, False otherwise. + :rtype: bool + """ + return await self._exists(**kwargs) + + @distributed_trace_async + async def delete_directory(self, **kwargs): + # type: (...) -> None + """ + Marks the specified directory for deletion. + + :keyword lease: + Required if the directory has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: None. + :rtype: None + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_directory_async.py + :start-after: [START delete_directory] + :end-before: [END delete_directory] + :language: python + :dedent: 4 + :caption: Delete directory. + """ + return await self._delete(recursive=True, **kwargs) + + @distributed_trace_async + async def get_directory_properties(self, **kwargs): + # type: (**Any) -> DirectoryProperties + """Returns all user-defined metadata, standard HTTP properties, and + system properties for the directory. It does not return the content of the directory. + + :keyword lease: + Required if the directory or file has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Decrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + Required if the directory was created with a customer-provided key. + :keyword bool upn: + If True, the user identity values returned in the x-ms-owner, x-ms-group, + and x-ms-acl response headers will be transformed from Azure Active Directory Object IDs to User + Principal Names in the owner, group, and acl fields of + :class:`~azure.storage.filedatalake.DirectoryProperties`. If False, the values will be returned + as Azure Active Directory Object IDs. The default value is False. Note that group and application + Object IDs are not translate because they do not have unique friendly names. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: + Information including user-defined metadata, standard HTTP properties, + and system properties for the file or directory. + :rtype: DirectoryProperties + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_directory_async.py + :start-after: [START get_directory_properties] + :end-before: [END get_directory_properties] + :language: python + :dedent: 4 + :caption: Getting the properties for a file/directory. + """ + upn = kwargs.pop('upn', None) + if upn: + headers = kwargs.pop('headers', {}) + headers['x-ms-upn'] = str(upn) + kwargs['headers'] = headers + return await self._get_path_properties(cls=deserialize_dir_properties, **kwargs) + + @distributed_trace_async + async def rename_directory(self, new_name, # type: str + **kwargs): + # type: (...) -> DataLakeDirectoryClient + """ + Rename the source directory. + + :param str new_name: + the new directory name the user want to rename to. + The value must have the following format: "{filesystem}/{directory}/{subdirectory}". + :keyword source_lease: + A lease ID for the source path. If specified, + the source path must have an active lease and the lease ID must + match. + :paramtype source_lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword lease: + Required if the file/directory has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~datetime.datetime source_if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime source_if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str source_etag: + The source ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions source_match_condition: + The source match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: DataLakeDirectoryClient containing the renamed directory. + :rtype: ~azure.storage.filedatalake.aio.DataLakeDirectoryClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_directory_async.py + :start-after: [START rename_directory] + :end-before: [END rename_directory] + :language: python + :dedent: 4 + :caption: Rename the source directory. + """ + new_file_system, new_path, new_dir_sas = self._parse_rename_path(new_name) + + new_directory_client = DataLakeDirectoryClient( + f"{self.scheme}://{self.primary_hostname}", new_file_system, directory_name=new_path, + credential=self._raw_credential or new_dir_sas, + _hosts=self._hosts, _configuration=self._config, _pipeline=self._pipeline) + await new_directory_client._rename_path( # pylint: disable=protected-access + f'/{quote(unquote(self.file_system_name))}/{quote(unquote(self.path_name))}{self._query_str}', **kwargs) + return new_directory_client + + @distributed_trace_async + async def create_sub_directory(self, sub_directory, # type: Union[DirectoryProperties, str] + metadata=None, # type: Optional[Dict[str, str]] + **kwargs): + # type: (...) -> DataLakeDirectoryClient + """ + Create a subdirectory and return the subdirectory client to be interacted with. + + :param sub_directory: + The directory with which to interact. This can either be the name of the directory, + or an instance of DirectoryProperties. + :type sub_directory: str or ~azure.storage.filedatalake.DirectoryProperties + :param metadata: + Name-value pairs associated with the file as metadata. + :type metadata: dict(str, str) + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword lease: + Required if the file has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword str umask: + Optional and only valid if Hierarchical Namespace is enabled for the account. + When creating a file or directory and the parent folder does not have a default ACL, + the umask restricts the permissions of the file or directory to be created. + The resulting permission is given by p & ^u, where p is the permission and u is the umask. + For example, if p is 0777 and u is 0057, then the resulting permission is 0720. + The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. + The umask must be specified in 4-digit octal notation (e.g. 0766). + :keyword str owner: + The owner of the file or directory. + :keyword str group: + The owning group of the file or directory. + :keyword str acl: + Sets POSIX access control rights on files and directories. The value is a + comma-separated list of access control entries. Each access control entry (ACE) consists of a + scope, a type, a user or group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :keyword str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. + :keyword str permissions: + Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: DataLakeDirectoryClient for the subdirectory. + :rtype: ~azure.storage.filedatalake.aio.DataLakeDirectoryClient + """ + subdir = self.get_sub_directory_client(sub_directory) + await subdir.create_directory(metadata=metadata, **kwargs) + return subdir + + @distributed_trace_async + async def delete_sub_directory(self, sub_directory, # type: Union[DirectoryProperties, str] + **kwargs): + # type: (...) -> DataLakeDirectoryClient + """ + Marks the specified subdirectory for deletion. + + :param sub_directory: + The directory with which to interact. This can either be the name of the directory, + or an instance of DirectoryProperties. + :type sub_directory: str or ~azure.storage.filedatalake.DirectoryProperties + :keyword lease: + Required if the directory has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: DataLakeDirectoryClient for the subdirectory. + :rtype: ~azure.storage.filedatalake.aio.DataLakeDirectoryClient + """ + subdir = self.get_sub_directory_client(sub_directory) + await subdir.delete_directory(**kwargs) + return subdir + + @distributed_trace_async + async def create_file(self, file, # type: Union[FileProperties, str] + **kwargs): + # type: (...) -> DataLakeFileClient + """ + Create a new file and return the file client to be interacted with. + + :param file: + The file with which to interact. This can either be the name of the file, + or an instance of FileProperties. + :type file: str or ~azure.storage.filedatalake.FileProperties + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword metadata: + Name-value pairs associated with the file as metadata. + :type metadata: dict(str, str) + :keyword lease: + Required if the file has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword str umask: + Optional and only valid if Hierarchical Namespace is enabled for the account. + When creating a file or directory and the parent folder does not have a default ACL, + the umask restricts the permissions of the file or directory to be created. + The resulting permission is given by p & ^u, where p is the permission and u is the umask. + For example, if p is 0777 and u is 0057, then the resulting permission is 0720. + The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. + The umask must be specified in 4-digit octal notation (e.g. 0766). + :keyword str owner: + The owner of the file or directory. + :keyword str group: + The owning group of the file or directory. + :keyword str acl: + Sets POSIX access control rights on files and directories. The value is a + comma-separated list of access control entries. Each access control entry (ACE) consists of a + scope, a type, a user or group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :keyword str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. + :keyword expires_on: + The time to set the file to expiry. + If the type of expires_on is an int, expiration time will be set + as the number of milliseconds elapsed from creation time. + If the type of expires_on is datetime, expiration time will be set + absolute to the time provided. If no time zone info is provided, this + will be interpreted as UTC. + :paramtype expires_on: datetime or int + :keyword str permissions: + Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: DataLakeFileClient with the new file. + :rtype: ~azure.storage.filedatalake.aio.DataLakeDirectoryClient + """ + file_client = self.get_file_client(file) + await file_client.create_file(**kwargs) + return file_client + + @distributed_trace + def get_paths( + self, *, + recursive: bool = True, + max_results: Optional[int] = None, + upn: Optional[bool] = None, + timeout: Optional[int] = None, + **kwargs: Any + ) -> AsyncItemPaged["PathProperties"]: + """Returns an async generator to list the paths under specified file system and directory. + The generator will lazily follow the continuation tokens returned by the service. + + :keyword bool recursive: Set True for recursive, False for iterative. The default value is True. + :keyword Optional[int] max_results: An optional value that specifies the maximum + number of items to return per page. If omitted or greater than 5,000, the + response will include up to 5,000 items per page. + :keyword Optional[bool] upn: + If True, the user identity values returned in the x-ms-owner, x-ms-group, + and x-ms-acl response headers will be transformed from Azure Active Directory Object IDs to User + Principal Names in the owner, group, and acl fields of + :class:`~azure.storage.filedatalake.PathProperties`. If False, the values will be returned + as Azure Active Directory Object IDs. The default value is None. Note that group and application + Object IDs are not translate because they do not have unique friendly names. + :keyword Optional[int] timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. The default value is None. + :returns: An iterable (auto-paging) response of PathProperties. + :rtype: ~azure.core.paging.AsyncItemPaged[~azure.storage.filedatalake.PathProperties] + """ + timeout = kwargs.pop('timeout', None) + hostname = self._hosts[self._location_mode] + url = f"{self.scheme}://{hostname}/{quote(self.file_system_name)}" + client = self._build_generated_client(url) + command = functools.partial( + client.file_system.list_paths, + path=self.path_name, + timeout=timeout, + **kwargs + ) + return AsyncItemPaged( + command, recursive, path=self.path_name, max_results=max_results, + upn=upn, page_iterator_class=PathPropertiesPaged, **kwargs) + + def get_file_client(self, file # type: Union[FileProperties, str] + ): + # type: (...) -> DataLakeFileClient + """Get a client to interact with the specified file. + + The file need not already exist. + + :param file: + The file with which to interact. This can either be the name of the file, + or an instance of FileProperties. eg. directory/subdirectory/file + :type file: str or ~azure.storage.filedatalake.FileProperties + :returns: A DataLakeFileClient. + :rtype: ~azure.storage.filedatalake.aio.DataLakeFileClient + """ + try: + file_path = file.get('name') + except AttributeError: + file_path = self.path_name + '/' + str(file) + + _pipeline = AsyncPipeline( + transport=AsyncTransportWrapper(self._pipeline._transport), # pylint: disable = protected-access + policies=self._pipeline._impl_policies # pylint: disable = protected-access + ) + return DataLakeFileClient( + self.url, self.file_system_name, file_path=file_path, credential=self._raw_credential, + api_version=self.api_version, + _hosts=self._hosts, _configuration=self._config, _pipeline=_pipeline) + + def get_sub_directory_client(self, sub_directory # type: Union[DirectoryProperties, str] + ): + # type: (...) -> DataLakeDirectoryClient + """Get a client to interact with the specified subdirectory of the current directory. + + The sub subdirectory need not already exist. + + :param sub_directory: + The directory with which to interact. This can either be the name of the directory, + or an instance of DirectoryProperties. + :type sub_directory: str or ~azure.storage.filedatalake.DirectoryProperties + :returns: A DataLakeDirectoryClient. + :rtype: ~azure.storage.filedatalake.aio.DataLakeDirectoryClient + """ + try: + subdir_path = sub_directory.get('name') + except AttributeError: + subdir_path = self.path_name + '/' + str(sub_directory) + + _pipeline = AsyncPipeline( + transport=AsyncTransportWrapper(self._pipeline._transport), # pylint: disable = protected-access + policies=self._pipeline._impl_policies # pylint: disable = protected-access + ) + return DataLakeDirectoryClient( + self.url, self.file_system_name, directory_name=subdir_path, credential=self._raw_credential, + api_version=self.api_version, + _hosts=self._hosts, _configuration=self._config, _pipeline=_pipeline) diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_file_client_async.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_file_client_async.py new file mode 100644 index 00000000..9b00b0b6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_file_client_async.py @@ -0,0 +1,735 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +# pylint: disable=invalid-overridden-method, docstring-keyword-should-match-keyword-only + +from datetime import datetime +from typing import ( + Any, AnyStr, AsyncIterable, Dict, IO, Iterable, Optional, Union, + TYPE_CHECKING) +from urllib.parse import quote, unquote + +from azure.core.exceptions import HttpResponseError +from azure.core.tracing.decorator_async import distributed_trace_async +from ._download_async import StorageStreamDownloader +from ._path_client_async import PathClient +from .._data_lake_file_client import DataLakeFileClient as DataLakeFileClientBase +from .._serialize import convert_datetime_to_rfc1123 +from .._deserialize import process_storage_error, deserialize_file_properties +from .._models import FileProperties +from ..aio._upload_helper import upload_datalake_file + +if TYPE_CHECKING: + from azure.core.credentials import AzureNamedKeyCredential, AzureSasCredential + from azure.core.credentials_async import AsyncTokenCredential + from .._models import ContentSettings + + +class DataLakeFileClient(PathClient, DataLakeFileClientBase): + """A client to interact with the DataLake file, even if the file may not yet exist. + + :ivar str url: + The full endpoint URL to the file system, including SAS token if used. + :ivar str primary_endpoint: + The full primary endpoint URL. + :ivar str primary_hostname: + The hostname of the primary endpoint. + :param str account_url: + The URI to the storage account. + :param file_system_name: + The file system for the directory or files. + :type file_system_name: str + :param file_path: + The whole file path, so that to interact with a specific file. + eg. "{directory}/{subdirectory}/{file}" + :type file_path: str + :param credential: + The credentials with which to authenticate. This is optional if the + account URL already has a SAS token. The value can be a SAS token string, + an instance of a AzureSasCredential or AzureNamedKeyCredential from azure.core.credentials, + an account shared access key, or an instance of a TokenCredentials class from azure.identity. + If the resource URI already contains a SAS token, this will be ignored in favor of an explicit credential + - except in the case of AzureSasCredential, where the conflicting SAS tokens will raise a ValueError. + If using an instance of AzureNamedKeyCredential, "name" should be the storage account name, and "key" + should be the storage account key. + :type credential: + ~azure.core.credentials.AzureNamedKeyCredential or + ~azure.core.credentials.AzureSasCredential or + ~azure.core.credentials_async.AsyncTokenCredential or + str or dict[str, str] or None + :keyword str api_version: + The Storage API version to use for requests. Default value is the most recent service version that is + compatible with the current SDK. Setting to an older version may result in reduced feature compatibility. + :keyword str audience: The audience to use when requesting tokens for Azure Active Directory + authentication. Only has an effect when credential is of type TokenCredential. The value could be + https://storage.azure.com/ (default) or https://<account>.blob.core.windows.net. + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_instantiate_client_async.py + :start-after: [START instantiate_file_client_from_conn_str] + :end-before: [END instantiate_file_client_from_conn_str] + :language: python + :dedent: 4 + :caption: Creating the DataLakeServiceClient from connection string. + """ + + def __init__( + self, account_url: str, + file_system_name: str, + file_path: str, + credential: Optional[Union[str, Dict[str, str], "AzureNamedKeyCredential", "AzureSasCredential", "AsyncTokenCredential"]] = None, # pylint: disable=line-too-long + **kwargs: Any + ) -> None: + super(DataLakeFileClient, self).__init__(account_url, file_system_name, path_name=file_path, + credential=credential, **kwargs) + + @distributed_trace_async + async def create_file(self, content_settings=None, # type: Optional[ContentSettings] + metadata=None, # type: Optional[Dict[str, str]] + **kwargs): + # type: (...) -> Dict[str, Union[str, datetime]] + """ + Create a new file. + + :param ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :param metadata: + Name-value pairs associated with the file as metadata. + :type metadata: Optional[dict[str, str]] + :keyword lease: + Required if the file has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword str umask: + Optional and only valid if Hierarchical Namespace is enabled for the account. + When creating a file or directory and the parent folder does not have a default ACL, + the umask restricts the permissions of the file or directory to be created. + The resulting permission is given by p & ^u, where p is the permission and u is the umask. + For example, if p is 0777 and u is 0057, then the resulting permission is 0720. + The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. + The umask must be specified in 4-digit octal notation (e.g. 0766). + :keyword str owner: + The owner of the file or directory. + :keyword str group: + The owning group of the file or directory. + :keyword str acl: + Sets POSIX access control rights on files and directories. The value is a + comma-separated list of access control entries. Each access control entry (ACE) consists of a + scope, a type, a user or group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :keyword str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. + :keyword expires_on: + The time to set the file to expiry. + If the type of expires_on is an int, expiration time will be set + as the number of milliseconds elapsed from creation time. + If the type of expires_on is datetime, expiration time will be set + absolute to the time provided. If no time zone info is provided, this + will be interpreted as UTC. + :paramtype expires_on: datetime or int + :keyword str permissions: + Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :keyword str encryption_context: + Specifies the encryption context to set on the file. + :returns: response dict (Etag and last modified). + :rtype: dict[str, str] or dict[~datetime.datetime] + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_upload_download_async.py + :start-after: [START create_file] + :end-before: [END create_file] + :language: python + :dedent: 4 + :caption: Create file. + """ + return await self._create('file', content_settings=content_settings, metadata=metadata, **kwargs) + + @distributed_trace_async + async def exists(self, **kwargs): + # type: (**Any) -> bool + """ + Returns True if a file exists and returns False otherwise. + + :kwarg int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: True if a file exists, False otherwise. + :rtype: bool + """ + return await self._exists(**kwargs) + + @distributed_trace_async + async def delete_file(self, **kwargs): + # type: (...) -> None + """ + Marks the specified file for deletion. + + :keyword lease: + Required if the file has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: None. + :rtype: None + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_upload_download_async.py + :start-after: [START delete_file] + :end-before: [END delete_file] + :language: python + :dedent: 4 + :caption: Delete file. + """ + return await self._delete(**kwargs) + + @distributed_trace_async + async def get_file_properties(self, **kwargs): + # type: (**Any) -> FileProperties + """Returns all user-defined metadata, standard HTTP properties, and + system properties for the file. It does not return the content of the file. + + :keyword lease: + Required if the directory or file has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Decrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + Required if the file was created with a customer-provided key. + :keyword bool upn: + If True, the user identity values returned in the x-ms-owner, x-ms-group, + and x-ms-acl response headers will be transformed from Azure Active Directory Object IDs to User + Principal Names in the owner, group, and acl fields of + :class:`~azure.storage.filedatalake.FileProperties`. If False, the values will be returned + as Azure Active Directory Object IDs. The default value is False. Note that group and application + Object IDs are not translate because they do not have unique friendly names. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: All user-defined metadata, standard HTTP properties, and system properties for the file. + :rtype: ~azure.storage.filedatalake.FileProperties + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_upload_download_async.py + :start-after: [START get_file_properties] + :end-before: [END get_file_properties] + :language: python + :dedent: 4 + :caption: Getting the properties for a file. + """ + return await self._get_path_properties(cls=deserialize_file_properties, **kwargs) + + @distributed_trace_async + async def set_file_expiry(self, expiry_options, # type: str + expires_on=None, # type: Optional[Union[datetime, int]] + **kwargs): + # type: (...) -> None + """Sets the time a file will expire and be deleted. + + :param str expiry_options: + Required. Indicates mode of the expiry time. + Possible values include: 'NeverExpire', 'RelativeToCreation', 'RelativeToNow', 'Absolute' + :param datetime or int expires_on: + The time to set the file to expiry. + When expiry_options is RelativeTo*, expires_on should be an int in milliseconds + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :rtype: None + """ + if isinstance(expires_on, datetime): + expires_on = convert_datetime_to_rfc1123(expires_on) + elif expires_on is not None: + expires_on = str(expires_on) + await self._datalake_client_for_blob_operation.path.set_expiry(expiry_options, expires_on=expires_on, + **kwargs) + + @distributed_trace_async + async def upload_data( + self, data: Union[bytes, str, Iterable[AnyStr], AsyncIterable[AnyStr], IO[AnyStr]], + length: Optional[int] = None, + overwrite: Optional[bool] = False, + **kwargs + ) -> Dict[str, Any]: + """ + Upload data to a file. + + :param data: Content to be uploaded to file + :type data: bytes, str, Iterable[AnyStr], AsyncIterable[AnyStr], or IO[AnyStr] + :param int length: Size of the data in bytes. + :param bool overwrite: to overwrite an existing file or not. + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword metadata: + Name-value pairs associated with the blob as metadata. + :paramtype metadata: dict[str, str] or None + :keyword ~azure.storage.filedatalake.DataLakeLeaseClient or str lease: + Required if the blob has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :keyword str umask: Optional and only valid if Hierarchical Namespace is enabled for the account. + When creating a file or directory and the parent folder does not have a default ACL, + the umask restricts the permissions of the file or directory to be created. + The resulting permission is given by p & ^u, where p is the permission and u is the umask. + For example, if p is 0777 and u is 0057, then the resulting permission is 0720. + The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. + The umask must be specified in 4-digit octal notation (e.g. 0766). + :keyword str permissions: Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword bool validate_content: + If true, calculates an MD5 hash for each chunk of the file. The storage + service checks the hash of the content that has arrived with the hash + that was sent. This is primarily valuable for detecting bitflips on + the wire if using http instead of https, as https (the default), will + already validate. Note that this MD5 hash is not stored with the + blob. Also note that if enabled, the memory-efficient upload algorithm + will not be used because computing the MD5 hash requires buffering + entire blocks, and doing so defeats the purpose of the memory-efficient algorithm. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. This method may make multiple calls to the service and + the timeout will apply to each call individually. + :keyword int max_concurrency: + Maximum number of parallel connections to use when transferring the file in chunks. + This option does not affect the underlying connection pool, and may + require a separate configuration of the connection pool. + :keyword int chunk_size: + The maximum chunk size for uploading a file in chunks. + Defaults to 100*1024*1024, or 100MB. + :keyword str encryption_context: + Specifies the encryption context to set on the file. + :return: response dict (Etag and last modified). + :rtype: dict[str, Any] + """ + options = self._upload_options( + data, + length=length, + overwrite=overwrite, + **kwargs) + return await upload_datalake_file(**options) + + @distributed_trace_async + async def append_data(self, data, # type: Union[bytes, str, Iterable[AnyStr], IO[AnyStr]] + offset, # type: int + length=None, # type: Optional[int] + **kwargs): + # type: (...) -> Dict[str, Union[str, datetime, int]] + """Append data to the file. + + :param data: Content to be appended to file + :type data: bytes, str, Iterable[AnyStr], or IO[AnyStr] + :param int offset: start position of the data to be appended to. + :param length: Size of the data in bytes. + :type length: int or None + :keyword bool flush: + If true, will commit the data after it is appended. + :keyword bool validate_content: + If true, calculates an MD5 hash of the block content. The storage + service checks the hash of the content that has arrived + with the hash that was sent. This is primarily valuable for detecting + bitflips on the wire if using http instead of https as https (the default) + will already validate. Note that this MD5 hash is not stored with the + file. + :keyword lease_action: + Used to perform lease operations along with appending data. + + "acquire" - Acquire a lease. + "auto-renew" - Re-new an existing lease. + "release" - Release the lease once the operation is complete. Requires `flush=True`. + "acquire-release" - Acquire a lease and release it once the operations is complete. Requires `flush=True`. + :paramtype lease_action: Literal["acquire", "auto-renew", "release", "acquire-release"] + :keyword int lease_duration: + Valid if `lease_action` is set to "acquire" or "acquire-release". + + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. Default is -1 (infinite lease). + :keyword lease: + Required if the file has an active lease or if `lease_action` is set to "acquire" or "acquire-release". + If the file has an existing lease, this will be used to access the file. If acquiring a new lease, + this will be used as the new lease id. + Value can be a DataLakeLeaseClient object or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :returns: dict of the response header. + :rtype: dict[str, str], dict[str, ~datetime.datetime], or dict[str, int] + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_upload_download_async.py + :start-after: [START append_data] + :end-before: [END append_data] + :language: python + :dedent: 4 + :caption: Append data to the file. + """ + options = self._append_data_options( + data=data, + offset=offset, + scheme=self.scheme, + length=length, + **kwargs) + try: + return await self._client.path.append_data(**options) + except HttpResponseError as error: + process_storage_error(error) + + @distributed_trace_async + async def flush_data(self, offset, # type: int + retain_uncommitted_data=False, # type: Optional[bool] + **kwargs): + # type: (...) -> Dict[str, Union[str, datetime]] + """ Commit the previous appended data. + + :param int offset: offset is equal to the length of the file after commit the + previous appended data. + :param bool retain_uncommitted_data: Valid only for flush operations. If + "true", uncommitted data is retained after the flush operation + completes; otherwise, the uncommitted data is deleted after the flush + operation. The default is false. Data at offsets less than the + specified position are written to the file when flush succeeds, but + this optional parameter allows data after the flush position to be + retained for a future flush operation. + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword bool close: Azure Storage Events allow applications to receive + notifications when files change. When Azure Storage Events are + enabled, a file changed event is raised. This event has a property + indicating whether this is the final change to distinguish the + difference between an intermediate flush to a file stream and the + final close of a file stream. The close query parameter is valid only + when the action is "flush" and change notifications are enabled. If + the value of close is "true" and the flush operation completes + successfully, the service raises a file change notification with a + property indicating that this is the final update (the file stream has + been closed). If "false" a change notification is raised indicating + the file has changed. The default is false. This query parameter is + set to true by the Hadoop ABFS driver to indicate that the file stream + has been closed." + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword lease_action: + Used to perform lease operations along with appending data. + + "acquire" - Acquire a lease. + "auto-renew" - Re-new an existing lease. + "release" - Release the lease once the operation is complete. + "acquire-release" - Acquire a lease and release it once the operations is complete. + :paramtype lease_action: Literal["acquire", "auto-renew", "release", "acquire-release"] + :keyword int lease_duration: + Valid if `lease_action` is set to "acquire" or "acquire-release". + + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. Default is -1 (infinite lease). + :keyword lease: + Required if the file has an active lease or if `lease_action` is set to "acquire" or "acquire-release". + If the file has an existing lease, this will be used to access the file. If acquiring a new lease, + this will be used as the new lease id. + Value can be a DataLakeLeaseClient object or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :returns: response header in dict. + :rtype: dict[str, str] or dict[str, ~datetime.datetime] + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START upload_file_to_file_system] + :end-before: [END upload_file_to_file_system] + :language: python + :dedent: 12 + :caption: Commit the previous appended data. + """ + options = self._flush_data_options( + offset, + self.scheme, + retain_uncommitted_data=retain_uncommitted_data, **kwargs) + try: + return await self._client.path.flush_data(**options) + except HttpResponseError as error: + process_storage_error(error) + + @distributed_trace_async + async def download_file(self, offset=None, length=None, **kwargs): + # type: (Optional[int], Optional[int], Any) -> StorageStreamDownloader + """Downloads a file to the StorageStreamDownloader. The readall() method must + be used to read all the content, or readinto() must be used to download the file into + a stream. Using chunks() returns an async iterator which allows the user to iterate over the content in chunks. + + :param int offset: + Start of byte range to use for downloading a section of the file. + Must be set if length is provided. + :param int length: + Number of bytes to read from the stream. This is optional, but + should be supplied for optimal performance. + :keyword lease: + If specified, download only succeeds if the file's lease is active + and matches this ID. Required if the file has an active lease. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Decrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + Required if the file was created with a Customer-Provided Key. + :keyword int max_concurrency: + Maximum number of parallel connections to use when transferring the file in chunks. + This option does not affect the underlying connection pool, and may + require a separate configuration of the connection pool. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. This method may make multiple calls to the service and + the timeout will apply to each call individually. + :returns: A streaming object (StorageStreamDownloader) + :rtype: ~azure.storage.filedatalake.aio.StorageStreamDownloader + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_upload_download_async.py + :start-after: [START read_file] + :end-before: [END read_file] + :language: python + :dedent: 4 + :caption: Return the downloaded data. + """ + downloader = await self._blob_client.download_blob(offset=offset, length=length, **kwargs) + return StorageStreamDownloader(downloader) + + @distributed_trace_async + async def rename_file(self, new_name, **kwargs): + # type: (str, **Any) -> DataLakeFileClient + """ + Rename the source file. + + :param str new_name: the new file name the user want to rename to. + The value must have the following format: "{filesystem}/{directory}/{subdirectory}/{file}". + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword source_lease: A lease ID for the source path. If specified, + the source path must have an active lease and the lease ID must + match. + :paramtype source_lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword lease: + Required if the file/directory has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :type permissions: str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~datetime.datetime source_if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime source_if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str source_etag: + The source ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions source_match_condition: + The source match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: the renamed file client + :rtype: DataLakeFileClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_upload_download_async.py + :start-after: [START rename_file] + :end-before: [END rename_file] + :language: python + :dedent: 4 + :caption: Rename the source file. + """ + new_file_system, new_path, new_file_sas = self._parse_rename_path(new_name) + + new_file_client = DataLakeFileClient( + f"{self.scheme}://{self.primary_hostname}", new_file_system, file_path=new_path, + credential=self._raw_credential or new_file_sas, + _hosts=self._hosts, _configuration=self._config, _pipeline=self._pipeline, + _location_mode=self._location_mode) + await new_file_client._rename_path( # pylint: disable=protected-access + f'/{quote(unquote(self.file_system_name))}/{quote(unquote(self.path_name))}{self._query_str}', **kwargs) + return new_file_client diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_lease_async.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_lease_async.py new file mode 100644 index 00000000..0dae4306 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_lease_async.py @@ -0,0 +1,269 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +# pylint: disable=invalid-overridden-method, docstring-keyword-should-match-keyword-only + +from typing import ( + Union, Optional, Any, + TypeVar, TYPE_CHECKING +) +from azure.core.tracing.decorator_async import distributed_trace_async +from azure.storage.blob.aio import BlobLeaseClient +from .._data_lake_lease import DataLakeLeaseClient as DataLakeLeaseClientBase + + +if TYPE_CHECKING: + FileSystemClient = TypeVar("FileSystemClient") + DataLakeDirectoryClient = TypeVar("DataLakeDirectoryClient") + DataLakeFileClient = TypeVar("DataLakeFileClient") + + +class DataLakeLeaseClient(DataLakeLeaseClientBase): # pylint: disable=client-accepts-api-version-keyword + """Creates a new DataLakeLeaseClient. + + This client provides lease operations on a FileSystemClient, DataLakeDirectoryClient or DataLakeFileClient. + + :ivar str id: + The ID of the lease currently being maintained. This will be `None` if no + lease has yet been acquired. + :ivar str etag: + The ETag of the lease currently being maintained. This will be `None` if no + lease has yet been acquired or modified. + :ivar ~datetime.datetime last_modified: + The last modified timestamp of the lease currently being maintained. + This will be `None` if no lease has yet been acquired or modified. + + :param client: + The client of the file system, directory, or file to lease. + :type client: ~azure.storage.filedatalake.aio.FileSystemClient or + ~azure.storage.filedatalake.aio.DataLakeDirectoryClient or ~azure.storage.filedatalake.aio.DataLakeFileClient + :param str lease_id: + A string representing the lease ID of an existing lease. This value does not + need to be specified in order to acquire a new lease, or break one. + """ + def __init__( + self, client, lease_id=None + ): # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs + # type: (Union[FileSystemClient, DataLakeDirectoryClient, DataLakeFileClient], Optional[str]) -> None + super(DataLakeLeaseClient, self).__init__(client, lease_id) + + if hasattr(client, '_blob_client'): + _client = client._blob_client # type: ignore + elif hasattr(client, '_container_client'): + _client = client._container_client # type: ignore + else: + raise TypeError("Lease must use any of FileSystemClient DataLakeDirectoryClient, or DataLakeFileClient.") + + self._blob_lease_client = BlobLeaseClient(_client, lease_id=lease_id) + + def __enter__(self): + raise TypeError("Async lease must use 'async with'.") + + def __exit__(self, *args): + self.release() + + async def __aenter__(self): + return self + + async def __aexit__(self, *args): + await self.release() + + @distributed_trace_async + async def acquire(self, lease_duration=-1, **kwargs): + # type: (int, Optional[int], **Any) -> None + """Requests a new lease. + + If the file/file system does not have an active lease, the DataLake service creates a + lease on the file/file system and returns a new lease ID. + + :param int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. Default is -1 (infinite lease). + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :rtype: None + """ + await self._blob_lease_client.acquire(lease_duration=lease_duration, **kwargs) + self._update_lease_client_attributes() + + @distributed_trace_async + async def renew(self, **kwargs): + # type: (Any) -> None + """Renews the lease. + + The lease can be renewed if the lease ID specified in the + lease client matches that associated with the file system or file. Note that + the lease may be renewed even if it has expired as long as the file system + or file has not been leased again since the expiration of that lease. When you + renew a lease, the lease duration clock resets. + + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: None + """ + await self._blob_lease_client.renew(**kwargs) + self._update_lease_client_attributes() + + @distributed_trace_async + async def release(self, **kwargs): + # type: (Any) -> None + """Release the lease. + + The lease may be released if the client lease id specified matches + that associated with the file system or file. Releasing the lease allows another client + to immediately acquire the lease for the file system or file as soon as the release is complete. + + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: None + """ + await self._blob_lease_client.release(**kwargs) + self._update_lease_client_attributes() + + @distributed_trace_async + async def change(self, proposed_lease_id, **kwargs): + # type: (str, Any) -> None + """Change the lease ID of an active lease. + + :param str proposed_lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns 400 + (Invalid request) if the proposed lease ID is not in the correct format. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: None + """ + await self._blob_lease_client.change(proposed_lease_id=proposed_lease_id, **kwargs) + self._update_lease_client_attributes() + + @distributed_trace_async + async def break_lease(self, lease_break_period=None, **kwargs): + # type: (Optional[int], Any) -> int + """Break the lease, if the file system or file has an active lease. + + Once a lease is broken, it cannot be renewed. Any authorized request can break the lease; + the request is not required to specify a matching lease ID. When a lease + is broken, the lease break period is allowed to elapse, during which time + no lease operation except break and release can be performed on the file system or file. + When a lease is successfully broken, the response indicates the interval + in seconds until a new lease can be acquired. + + :param int lease_break_period: + This is the proposed duration of seconds that the lease + should continue before it is broken, between 0 and 60 seconds. This + break period is only used if it is shorter than the time remaining + on the lease. If longer, the time remaining on the lease is used. + A new lease will not be available before the break period has + expired, but the lease may be held for longer than the break + period. If this header does not appear with a break + operation, a fixed-duration lease breaks after the remaining lease + period elapses, and an infinite lease breaks immediately. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: Approximate time remaining in the lease period, in seconds. + :rtype: int + """ + await self._blob_lease_client.break_lease(lease_break_period=lease_break_period, **kwargs) diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_service_client_async.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_service_client_async.py new file mode 100644 index 00000000..093aad92 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_data_lake_service_client_async.py @@ -0,0 +1,570 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +# pylint: disable=invalid-overridden-method, docstring-keyword-should-match-keyword-only + +from typing import Any, Dict, Optional, Union, TYPE_CHECKING + +from azure.core.paging import ItemPaged +from azure.core.pipeline import AsyncPipeline +from azure.core.tracing.decorator import distributed_trace +from azure.core.tracing.decorator_async import distributed_trace_async + +from azure.storage.blob.aio import BlobServiceClient +from .._serialize import get_api_version +from .._generated.aio import AzureDataLakeStorageRESTAPI +from .._deserialize import get_datalake_service_properties +from .._shared.base_client_async import AsyncTransportWrapper, AsyncStorageAccountHostsMixin +from ._file_system_client_async import FileSystemClient +from .._data_lake_service_client import DataLakeServiceClient as DataLakeServiceClientBase +from .._shared.policies_async import ExponentialRetry +from ._data_lake_directory_client_async import DataLakeDirectoryClient +from ._data_lake_file_client_async import DataLakeFileClient +from ._models import FileSystemPropertiesPaged +from .._models import UserDelegationKey, LocationMode + +if TYPE_CHECKING: + from azure.core.credentials import AzureNamedKeyCredential, AzureSasCredential + from azure.core.credentials_async import AsyncTokenCredential + + +class DataLakeServiceClient(AsyncStorageAccountHostsMixin, DataLakeServiceClientBase): + """A client to interact with the DataLake Service at the account level. + + This client provides operations to retrieve and configure the account properties + as well as list, create and delete file systems within the account. + For operations relating to a specific file system, directory or file, clients for those entities + can also be retrieved using the `get_client` functions. + + :ivar str url: + The full endpoint URL to the datalake service endpoint. + :ivar str primary_endpoint: + The full primary endpoint URL. + :ivar str primary_hostname: + The hostname of the primary endpoint. + :param str account_url: + The URL to the DataLake storage account. Any other entities included + in the URL path (e.g. file system or file) will be discarded. This URL can be optionally + authenticated with a SAS token. + :param credential: + The credentials with which to authenticate. This is optional if the + account URL already has a SAS token. The value can be a SAS token string, + an instance of a AzureSasCredential or AzureNamedKeyCredential from azure.core.credentials, + an account shared access key, or an instance of a TokenCredentials class from azure.identity. + If the resource URI already contains a SAS token, this will be ignored in favor of an explicit credential + - except in the case of AzureSasCredential, where the conflicting SAS tokens will raise a ValueError. + If using an instance of AzureNamedKeyCredential, "name" should be the storage account name, and "key" + should be the storage account key. + :type credential: + ~azure.core.credentials.AzureNamedKeyCredential or + ~azure.core.credentials.AzureSasCredential or + ~azure.core.credentials_async.AsyncTokenCredential or + str or dict[str, str] or None + :keyword str api_version: + The Storage API version to use for requests. Default value is the most recent service version that is + compatible with the current SDK. Setting to an older version may result in reduced feature compatibility. + :keyword str audience: The audience to use when requesting tokens for Azure Active Directory + authentication. Only has an effect when credential is of type TokenCredential. The value could be + https://storage.azure.com/ (default) or https://<account>.blob.core.windows.net. + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_service_async.py + :start-after: [START create_datalake_service_client] + :end-before: [END create_datalake_service_client] + :language: python + :dedent: 4 + :caption: Creating the DataLakeServiceClient from connection string. + + .. literalinclude:: ../samples/datalake_samples_service_async.py + :start-after: [START create_datalake_service_client_oauth] + :end-before: [END create_datalake_service_client_oauth] + :language: python + :dedent: 4 + :caption: Creating the DataLakeServiceClient with Azure Identity credentials. + """ + + def __init__( + self, account_url: str, + credential: Optional[Union[str, Dict[str, str], "AzureNamedKeyCredential", "AzureSasCredential", "AsyncTokenCredential"]] = None, # pylint: disable=line-too-long + **kwargs: Any + ) -> None: + kwargs['retry_policy'] = kwargs.get('retry_policy') or ExponentialRetry(**kwargs) + super(DataLakeServiceClient, self).__init__( + account_url, + credential=credential, + **kwargs + ) + self._blob_service_client = BlobServiceClient(self._blob_account_url, credential, **kwargs) + self._blob_service_client._hosts[LocationMode.SECONDARY] = "" + self._client = AzureDataLakeStorageRESTAPI(self.url, base_url=self.url, pipeline=self._pipeline) + self._client._config.version = get_api_version(kwargs) + self._loop = kwargs.get('loop', None) + + async def __aenter__(self): + await self._blob_service_client.__aenter__() + return self + + async def __aexit__(self, *args): + await self._blob_service_client.close() + await super(DataLakeServiceClient, self).__aexit__(*args) + + async def close(self): + # type: () -> None + """ This method is to close the sockets opened by the client. + It need not be used when using with a context manager. + """ + await self.__aexit__() + + @distributed_trace_async + async def get_user_delegation_key(self, key_start_time, # type: datetime + key_expiry_time, # type: datetime + **kwargs # type: Any + ): + # type: (...) -> UserDelegationKey + """ + Obtain a user delegation key for the purpose of signing SAS tokens. + A token credential must be present on the service object for this request to succeed. + + :param ~datetime.datetime key_start_time: + A DateTime value. Indicates when the key becomes valid. + :param ~datetime.datetime key_expiry_time: + A DateTime value. Indicates when the key stops being valid. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: The user delegation key. + :rtype: ~azure.storage.filedatalake.UserDelegationKey + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_service_async.py + :start-after: [START get_user_delegation_key] + :end-before: [END get_user_delegation_key] + :language: python + :dedent: 8 + :caption: Get user delegation key from datalake service client. + """ + delegation_key = await self._blob_service_client.get_user_delegation_key( + key_start_time=key_start_time, + key_expiry_time=key_expiry_time, + **kwargs) + return UserDelegationKey._from_generated(delegation_key) # pylint: disable=protected-access + + @distributed_trace + def list_file_systems(self, name_starts_with=None, # type: Optional[str] + include_metadata=None, # type: Optional[bool] + **kwargs): + # type: (...) -> ItemPaged[FileSystemProperties] + """Returns a generator to list the file systems under the specified account. + + The generator will lazily follow the continuation tokens returned by + the service and stop when all file systems have been returned. + + :param str name_starts_with: + Filters the results to return only file systems whose names + begin with the specified prefix. + :param bool include_metadata: + Specifies that file system metadata be returned in the response. + The default value is `False`. + :keyword int results_per_page: + The maximum number of file system names to retrieve per API + call. If the request does not specify the server will return up to 5,000 items per page. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :keyword bool include_deleted: + Specifies that deleted file systems to be returned in the response. This is for file system restore enabled + account. The default value is `False`. + .. versionadded:: 12.3.0 + :keyword bool include_system: + Flag specifying that system filesystems should be included. + .. versionadded:: 12.6.0 + :returns: An iterable (auto-paging) of FileSystemProperties. + :rtype: ~azure.core.paging.ItemPaged[~azure.storage.filedatalake.FileSystemProperties] + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_service_async.py + :start-after: [START list_file_systems] + :end-before: [END list_file_systems] + :language: python + :dedent: 8 + :caption: Listing the file systems in the datalake service. + """ + item_paged = self._blob_service_client.list_containers(name_starts_with=name_starts_with, + include_metadata=include_metadata, + **kwargs) + item_paged._page_iterator_class = FileSystemPropertiesPaged # pylint: disable=protected-access + return item_paged + + @distributed_trace_async + async def create_file_system(self, file_system, # type: Union[FileSystemProperties, str] + metadata=None, # type: Optional[Dict[str, str]] + public_access=None, # type: Optional[PublicAccess] + **kwargs): + # type: (...) -> FileSystemClient + """Creates a new file system under the specified account. + + If the file system with the same name already exists, a ResourceExistsError will + be raised. This method returns a client with which to interact with the newly + created file system. + + :param str file_system: + The name of the file system to create. + :param metadata: + A dict with name-value pairs to associate with the + file system as metadata. Example: `{'Category':'test'}` + :type metadata: dict(str, str) + :param public_access: + Possible values include: file system, file. + :type public_access: ~azure.storage.filedatalake.PublicAccess + :keyword encryption_scope_options: + Specifies the default encryption scope to set on the file system and use for + all future writes. + + .. versionadded:: 12.9.0 + + :paramtype encryption_scope_options: dict or ~azure.storage.filedatalake.EncryptionScopeOptions + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: FileSystemClient under the specified account. + :rtype: ~azure.storage.filedatalake.FileSystemClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_service_async.py + :start-after: [START create_file_system_from_service_client] + :end-before: [END create_file_system_from_service_client] + :language: python + :dedent: 8 + :caption: Creating a file system in the datalake service. + """ + file_system_client = self.get_file_system_client(file_system) + await file_system_client.create_file_system(metadata=metadata, public_access=public_access, **kwargs) + return file_system_client + + async def _rename_file_system(self, name, new_name, **kwargs): + # type: (str, str, **Any) -> FileSystemClient + """Renames a filesystem. + + Operation is successful only if the source filesystem exists. + + :param str name: + The name of the filesystem to rename. + :param str new_name: + The new filesystem name the user wants to rename to. + :keyword lease: + Specify this to perform only if the lease ID given + matches the active lease ID of the source filesystem. + :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: FileSystemClient with the newly specified name. + :rtype: ~azure.storage.filedatalake.FileSystemClient + """ + await self._blob_service_client._rename_container(name, new_name, **kwargs) # pylint: disable=protected-access + renamed_file_system = self.get_file_system_client(new_name) + return renamed_file_system + + @distributed_trace_async + async def undelete_file_system(self, name, deleted_version, **kwargs): + # type: (str, str, **Any) -> FileSystemClient + """Restores soft-deleted filesystem. + + Operation will only be successful if used within the specified number of days + set in the delete retention policy. + + .. versionadded:: 12.3.0 + This operation was introduced in API version '2019-12-12'. + + :param str name: + Specifies the name of the deleted filesystem to restore. + :param str deleted_version: + Specifies the version of the deleted filesystem to restore. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: The FileSystemClient of the restored soft-deleted filesystem. + :rtype: ~azure.storage.filedatalake.FileSystemClient + """ + new_name = kwargs.pop('new_name', None) + await self._blob_service_client.undelete_container(name, deleted_version, new_name=new_name, **kwargs) + file_system = self.get_file_system_client(new_name or name) + return file_system + + @distributed_trace_async + async def delete_file_system(self, file_system, # type: Union[FileSystemProperties, str] + **kwargs): + # type: (...) -> FileSystemClient + """Marks the specified file system for deletion. + + The file system and any files contained within it are later deleted during garbage collection. + If the file system is not found, a ResourceNotFoundError will be raised. + + :param file_system: + The file system to delete. This can either be the name of the file system, + or an instance of FileSystemProperties. + :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties + :keyword lease: + If specified, delete_file_system only succeeds if the + file system's lease is active and matches this ID. + Required if the file system has an active lease. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: FileSystemClient after marking the specified file system for deletion. + :rtype: ~azure.storage.filedatalake.aio.FileSystemClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_service_async.py + :start-after: [START delete_file_system_from_service_client] + :end-before: [END delete_file_system_from_service_client] + :language: python + :dedent: 8 + :caption: Deleting a file system in the datalake service. + """ + file_system_client = self.get_file_system_client(file_system) + await file_system_client.delete_file_system(**kwargs) + return file_system_client + + def get_file_system_client(self, file_system # type: Union[FileSystemProperties, str] + ): + # type: (...) -> FileSystemClient + """Get a client to interact with the specified file system. + + The file system need not already exist. + + :param file_system: + The file system. This can either be the name of the file system, + or an instance of FileSystemProperties. + :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties + :returns: A FileSystemClient. + :rtype: ~azure.storage.filedatalake.aio.FileSystemClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START create_file_system_client_from_service] + :end-before: [END create_file_system_client_from_service] + :language: python + :dedent: 8 + :caption: Getting the file system client to interact with a specific file system. + """ + try: + file_system_name = file_system.name + except AttributeError: + file_system_name = file_system + + _pipeline = AsyncPipeline( + transport=AsyncTransportWrapper(self._pipeline._transport), # pylint: disable = protected-access + policies=self._pipeline._impl_policies # pylint: disable = protected-access + ) + return FileSystemClient(self.url, file_system_name, credential=self._raw_credential, + api_version=self.api_version, + _configuration=self._config, + _pipeline=_pipeline, _hosts=self._hosts) + + def get_directory_client(self, file_system, # type: Union[FileSystemProperties, str] + directory # type: Union[DirectoryProperties, str] + ): + # type: (...) -> DataLakeDirectoryClient + """Get a client to interact with the specified directory. + + The directory need not already exist. + + :param file_system: + The file system that the directory is in. This can either be the name of the file system, + or an instance of FileSystemProperties. + :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties + :param directory: + The directory with which to interact. This can either be the name of the directory, + or an instance of DirectoryProperties. + :type directory: str or ~azure.storage.filedatalake.DirectoryProperties + :returns: A DataLakeDirectoryClient. + :rtype: ~azure.storage.filedatalake.aio.DataLakeDirectoryClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_service_async.py + :start-after: [START get_directory_client_from_service_client] + :end-before: [END get_directory_client_from_service_client] + :language: python + :dedent: 8 + :caption: Getting the directory client to interact with a specific directory. + """ + try: + file_system_name = file_system.name + except AttributeError: + file_system_name = file_system + try: + directory_name = directory.name + except AttributeError: + directory_name = directory + + _pipeline = AsyncPipeline( + transport=AsyncTransportWrapper(self._pipeline._transport), # pylint: disable = protected-access + policies=self._pipeline._impl_policies # pylint: disable = protected-access + ) + return DataLakeDirectoryClient(self.url, file_system_name, directory_name=directory_name, + credential=self._raw_credential, + api_version=self.api_version, + _configuration=self._config, _pipeline=_pipeline, + _hosts=self._hosts) + + def get_file_client(self, file_system, # type: Union[FileSystemProperties, str] + file_path # type: Union[FileProperties, str] + ): + # type: (...) -> DataLakeFileClient + """Get a client to interact with the specified file. + + The file need not already exist. + + :param file_system: + The file system that the file is in. This can either be the name of the file system, + or an instance of FileSystemProperties. + :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties + :param file_path: + The file with which to interact. This can either be the full path of the file(from the root directory), + or an instance of FileProperties. eg. directory/subdirectory/file + :type file_path: str or ~azure.storage.filedatalake.FileProperties + :returns: A DataLakeFileClient. + :rtype: ~azure.storage.filedatalake.aio.DataLakeFileClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_service_async.py + :start-after: [START get_file_client_from_service_client] + :end-before: [END get_file_client_from_service_client] + :language: python + :dedent: 8 + :caption: Getting the file client to interact with a specific file. + """ + try: + file_system_name = file_system.name + except AttributeError: + file_system_name = file_system + try: + file_path = file_path.name + except AttributeError: + pass + + _pipeline = AsyncPipeline( + transport=AsyncTransportWrapper(self._pipeline._transport), # pylint: disable = protected-access + policies=self._pipeline._impl_policies # pylint: disable = protected-access + ) + return DataLakeFileClient( + self.url, file_system_name, file_path=file_path, credential=self._raw_credential, + api_version=self.api_version, + _hosts=self._hosts, _configuration=self._config, _pipeline=_pipeline) + + @distributed_trace_async + async def set_service_properties(self, **kwargs): + # type: (**Any) -> None + """Sets the properties of a storage account's Datalake service, including + Azure Storage Analytics. + + If an element (e.g. analytics_logging) is left as None, the + existing settings on the service for that functionality are preserved. + + .. versionadded:: 12.4.0 + This operation was introduced in API version '2020-06-12'. + + :keyword analytics_logging: + Groups the Azure Analytics Logging settings. + :type analytics_logging: ~azure.storage.filedatalake.AnalyticsLogging + :keyword hour_metrics: + The hour metrics settings provide a summary of request + statistics grouped by API in hourly aggregates. + :type hour_metrics: ~azure.storage.filedatalake.Metrics + :keyword minute_metrics: + The minute metrics settings provide request statistics + for each minute. + :type minute_metrics: ~azure.storage.filedatalake.Metrics + :keyword cors: + You can include up to five CorsRule elements in the + list. If an empty list is specified, all CORS rules will be deleted, + and CORS will be disabled for the service. + :type cors: list[~azure.storage.filedatalake.CorsRule] + :keyword str target_version: + Indicates the default version to use for requests if an incoming + request's version is not specified. + :keyword delete_retention_policy: + The delete retention policy specifies whether to retain deleted files/directories. + It also specifies the number of days and versions of file/directory to keep. + :type delete_retention_policy: ~azure.storage.filedatalake.RetentionPolicy + :keyword static_website: + Specifies whether the static website feature is enabled, + and if yes, indicates the index document and 404 error document to use. + :type static_website: ~azure.storage.filedatalake.StaticWebsite + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + """ + await self._blob_service_client.set_service_properties(**kwargs) + + @distributed_trace_async + async def get_service_properties(self, **kwargs): + # type: (**Any) -> Dict[str, Any] + """Gets the properties of a storage account's datalake service, including + Azure Storage Analytics. + + .. versionadded:: 12.4.0 + This operation was introduced in API version '2020-06-12'. + + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: An object containing datalake service properties such as + analytics logging, hour/minute metrics, cors rules, etc. + :rtype: dict[str, Any] + """ + props = await self._blob_service_client.get_service_properties(**kwargs) + return get_datalake_service_properties(props) diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_download_async.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_download_async.py new file mode 100644 index 00000000..e22a9df9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_download_async.py @@ -0,0 +1,82 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +from typing import AsyncIterator, IO, Optional + +from .._deserialize import from_blob_properties + + +class StorageStreamDownloader(object): + """A streaming object to download from Azure Storage. + + :ivar str name: + The name of the file being downloaded. + :ivar ~azure.storage.filedatalake.FileProperties properties: + The properties of the file being downloaded. If only a range of the data is being + downloaded, this will be reflected in the properties. + :ivar int size: + The size of the total data in the stream. This will be the byte range if specified, + otherwise the total size of the file. + """ + + def __init__(self, downloader): + self._downloader = downloader + self.name = self._downloader.name + + # Parse additional Datalake-only properties + encryption_context = self._downloader._response.response.headers.get('x-ms-encryption-context') + acl = self._downloader._response.response.headers.get('x-ms-acl') + + self.properties = from_blob_properties( + self._downloader.properties, + encryption_context=encryption_context, + acl=acl) + self.size = self._downloader.size + + def __len__(self): + return self.size + + def chunks(self) -> AsyncIterator[bytes]: + """Iterate over chunks in the download stream. + + :returns: An async iterator over the chunks in the download stream. + :rtype: AsyncIterator[bytes] + """ + return self._downloader.chunks() + + async def read(self, size: Optional[int] = -1) -> bytes: + """ + Read up to size bytes from the stream and return them. If size + is unspecified or is -1, all bytes will be read. + + :param Optional[int] size: + The number of bytes to download from the stream. Leave unspecified + or set to -1 to download all bytes. + :returns: + The requested data as bytes. If the return value is empty, there is no more data to read. + :rtype: bytes + """ + return await self._downloader.read(size) + + async def readall(self) -> bytes: + """Download the contents of this file. + + This operation is blocking until all data is downloaded. + :returns: The contents of the file. + :rtype: bytes + """ + return await self._downloader.readall() + + async def readinto(self, stream: IO[bytes]) -> int: + """Download the contents of this file to a stream. + + :param IO[bytes] stream: + The stream to download to. This can be an open file-handle, + or any writable stream. The stream must be seekable if the download + uses more than one parallel connection. + :returns: The number of bytes read. + :rtype: int + """ + return await self._downloader.readinto(stream) diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_file_system_client_async.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_file_system_client_async.py new file mode 100644 index 00000000..9c3122a1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_file_system_client_async.py @@ -0,0 +1,1004 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +# pylint: disable=invalid-overridden-method, too-many-lines, docstring-keyword-should-match-keyword-only + +import functools +from typing import ( # pylint: disable=unused-import + Union, Optional, Any, Dict, List, Tuple, + TYPE_CHECKING +) + +from azure.core.exceptions import HttpResponseError +from azure.core.tracing.decorator import distributed_trace + +from azure.core.pipeline import AsyncPipeline +from azure.core.async_paging import AsyncItemPaged + +from azure.core.tracing.decorator_async import distributed_trace_async +from azure.storage.blob.aio import ContainerClient +from .._serialize import get_api_version +from .._deserialize import process_storage_error, is_file_path +from .._generated.models import ListBlobsIncludeItem + +from ._data_lake_file_client_async import DataLakeFileClient +from ._data_lake_directory_client_async import DataLakeDirectoryClient +from ._data_lake_lease_async import DataLakeLeaseClient +from .._file_system_client import FileSystemClient as FileSystemClientBase +from .._generated.aio import AzureDataLakeStorageRESTAPI +from .._shared.base_client_async import AsyncTransportWrapper, AsyncStorageAccountHostsMixin +from .._shared.policies_async import ExponentialRetry +from .._models import FileSystemProperties, PublicAccess, DirectoryProperties, FileProperties, DeletedPathProperties +from ._list_paths_helper import DeletedPathPropertiesPaged, PathPropertiesPaged + + +if TYPE_CHECKING: + from azure.core.credentials import AzureNamedKeyCredential, AzureSasCredential + from azure.core.credentials_async import AsyncTokenCredential + from datetime import datetime + from .._models import PathProperties + + +class FileSystemClient(AsyncStorageAccountHostsMixin, FileSystemClientBase): + """A client to interact with a specific file system, even if that file system + may not yet exist. + + For operations relating to a specific directory or file within this file system, a directory client or file client + can be retrieved using the :func:`~get_directory_client` or :func:`~get_file_client` functions. + + :ivar str url: + The full endpoint URL to the file system, including SAS token if used. + :ivar str primary_endpoint: + The full primary endpoint URL. + :ivar str primary_hostname: + The hostname of the primary endpoint. + :param str account_url: + The URI to the storage account. + :param file_system_name: + The file system for the directory or files. + :type file_system_name: str + :param credential: + The credentials with which to authenticate. This is optional if the + account URL already has a SAS token. The value can be a SAS token string, + an instance of a AzureSasCredential or AzureNamedKeyCredential from azure.core.credentials, + an account shared access key, or an instance of a TokenCredentials class from azure.identity. + If the resource URI already contains a SAS token, this will be ignored in favor of an explicit credential + - except in the case of AzureSasCredential, where the conflicting SAS tokens will raise a ValueError. + If using an instance of AzureNamedKeyCredential, "name" should be the storage account name, and "key" + should be the storage account key. + :type credential: + ~azure.core.credentials.AzureNamedKeyCredential or + ~azure.core.credentials.AzureSasCredential or + ~azure.core.credentials_async.AsyncTokenCredential or + str or dict[str, str] or None + :keyword str api_version: + The Storage API version to use for requests. Default value is the most recent service version that is + compatible with the current SDK. Setting to an older version may result in reduced feature compatibility. + :keyword str audience: The audience to use when requesting tokens for Azure Active Directory + authentication. Only has an effect when credential is of type TokenCredential. The value could be + https://storage.azure.com/ (default) or https://<account>.blob.core.windows.net. + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START create_file_system_client_from_service] + :end-before: [END create_file_system_client_from_service] + :language: python + :dedent: 8 + :caption: Get a FileSystemClient from an existing DataLakeServiceClient. + """ + + def __init__( + self, account_url: str, + file_system_name: str, + credential: Optional[Union[str, Dict[str, str], "AzureNamedKeyCredential", "AzureSasCredential", "AsyncTokenCredential"]] = None, # pylint: disable=line-too-long + **kwargs: Any + ) -> None: + kwargs['retry_policy'] = kwargs.get('retry_policy') or ExponentialRetry(**kwargs) + super(FileSystemClient, self).__init__( + account_url, + file_system_name=file_system_name, + credential=credential, + **kwargs) + # to override the class field _container_client sync version + kwargs.pop('_hosts', None) + self._container_client = ContainerClient(self._blob_account_url, self.file_system_name, + credential=credential, + _hosts=self._container_client._hosts, + **kwargs) # type: ignore + self._client = AzureDataLakeStorageRESTAPI(self.url, base_url=self.url, + file_system=self.file_system_name, pipeline=self._pipeline) + self._datalake_client_for_blob_operation = AzureDataLakeStorageRESTAPI(self._container_client.url, + base_url=self._container_client.url, + file_system=self.file_system_name, + pipeline=self._pipeline) + api_version = get_api_version(kwargs) + self._client._config.version = api_version + self._datalake_client_for_blob_operation._config.version = api_version + + self._loop = kwargs.get('loop', None) + + async def __aexit__(self, *args): + await self._container_client.close() + await self._datalake_client_for_blob_operation.close() + await super(FileSystemClient, self).__aexit__(*args) + + async def close(self): + # type: () -> None + """ This method is to close the sockets opened by the client. + It need not be used when using with a context manager. + """ + await self.__aexit__() + + @distributed_trace_async + async def acquire_lease( + self, lease_duration=-1, # type: int + lease_id=None, # type: Optional[str] + **kwargs + ): + # type: (...) -> DataLakeLeaseClient + """ + Requests a new lease. If the file system does not have an active lease, + the DataLake service creates a lease on the file system and returns a new + lease ID. + + :param int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. Default is -1 (infinite lease). + :param str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: A DataLakeLeaseClient object, that can be run in a context manager. + :rtype: ~azure.storage.filedatalake.aio.DataLakeLeaseClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START acquire_lease_on_file_system] + :end-before: [END acquire_lease_on_file_system] + :language: python + :dedent: 12 + :caption: Acquiring a lease on the file_system. + """ + lease = DataLakeLeaseClient(self, lease_id=lease_id) + await lease.acquire(lease_duration=lease_duration, **kwargs) + return lease + + @distributed_trace_async + async def create_file_system(self, metadata=None, # type: Optional[Dict[str, str]] + public_access=None, # type: Optional[PublicAccess] + **kwargs): + # type: (...) -> Dict[str, Union[str, datetime]] + """Creates a new file system under the specified account. + + If the file system with the same name already exists, a ResourceExistsError will + be raised. This method returns a client with which to interact with the newly + created file system. + + :param metadata: + A dict with name-value pairs to associate with the + file system as metadata. Example: `{'Category':'test'}` + :type metadata: dict(str, str) + :param public_access: + To specify whether data in the file system may be accessed publicly and the level of access. + :type public_access: ~azure.storage.filedatalake.PublicAccess + :keyword encryption_scope_options: + Specifies the default encryption scope to set on the file system and use for + all future writes. + + .. versionadded:: 12.9.0 + + :paramtype encryption_scope_options: dict or ~azure.storage.filedatalake.EncryptionScopeOptions + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: A dictionary of response headers. + :rtype: dict[str, Union[str, datetime]] + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START create_file_system] + :end-before: [END create_file_system] + :language: python + :dedent: 16 + :caption: Creating a file system in the datalake service. + """ + encryption_scope_options = kwargs.pop('encryption_scope_options', None) + return await self._container_client.create_container(metadata=metadata, + public_access=public_access, + container_encryption_scope=encryption_scope_options, + **kwargs) + + @distributed_trace_async + async def exists(self, **kwargs): + # type: (**Any) -> bool + """ + Returns True if a file system exists and returns False otherwise. + + :kwarg int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: True if a file system exists, False otherwise. + :rtype: bool + """ + return await self._container_client.exists(**kwargs) + + @distributed_trace_async + async def _rename_file_system(self, new_name, **kwargs): + # type: (str, **Any) -> FileSystemClient + """Renames a filesystem. + + Operation is successful only if the source filesystem exists. + + :param str new_name: + The new filesystem name the user wants to rename to. + :keyword lease: + Specify this to perform only if the lease ID given + matches the active lease ID of the source filesystem. + :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: FileSystemClient with renamed properties. + :rtype: ~azure.storage.filedatalake.FileSystemClient + """ + await self._container_client._rename_container(new_name, **kwargs) # pylint: disable=protected-access + renamed_file_system = FileSystemClient( + f"{self.scheme}://{self.primary_hostname}", file_system_name=new_name, + credential=self._raw_credential, api_version=self.api_version, _configuration=self._config, + _pipeline=self._pipeline, _location_mode=self._location_mode, _hosts=self._hosts) + return renamed_file_system + + @distributed_trace_async + async def delete_file_system(self, **kwargs): + # type: (Any) -> None + """Marks the specified file system for deletion. + + The file system and any files contained within it are later deleted during garbage collection. + If the file system is not found, a ResourceNotFoundError will be raised. + + :keyword lease: + If specified, delete_file_system only succeeds if the + file system's lease is active and matches this ID. + Required if the file system has an active lease. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :rtype: None + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START delete_file_system] + :end-before: [END delete_file_system] + :language: python + :dedent: 16 + :caption: Deleting a file system in the datalake service. + """ + await self._container_client.delete_container(**kwargs) + + @distributed_trace_async + async def get_file_system_properties(self, **kwargs): + # type: (Any) -> FileSystemProperties + """Returns all user-defined metadata and system properties for the specified + file system. The data returned does not include the file system's list of paths. + + :keyword lease: + If specified, get_file_system_properties only succeeds if the + file system's lease is active and matches this ID. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: Properties for the specified file system within a file system object. + :rtype: ~azure.storage.filedatalake.FileSystemProperties + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START get_file_system_properties] + :end-before: [END get_file_system_properties] + :language: python + :dedent: 16 + :caption: Getting properties on the file system. + """ + container_properties = await self._container_client.get_container_properties(**kwargs) + return FileSystemProperties._convert_from_container_props(container_properties) # pylint: disable=protected-access + + @distributed_trace_async + async def set_file_system_metadata( # type: ignore + self, metadata, # type: Dict[str, str] + **kwargs + ): + # type: (...) -> Dict[str, Union[str, datetime]] + """Sets one or more user-defined name-value pairs for the specified + file system. Each call to this operation replaces all existing metadata + attached to the file system. To remove all metadata from the file system, + call this operation with no metadata dict. + + :param metadata: + A dict containing name-value pairs to associate with the file system as + metadata. Example: {'category':'test'} + :type metadata: dict[str, str] + :keyword lease: + If specified, set_file_system_metadata only succeeds if the + file system's lease is active and matches this ID. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: file system-updated property dict (Etag and last modified). + :rtype: dict[str, str] or dict[str, ~datetime.datetime] + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START set_file_system_metadata] + :end-before: [END set_file_system_metadata] + :language: python + :dedent: 16 + :caption: Setting metadata on the container. + """ + return await self._container_client.set_container_metadata(metadata=metadata, **kwargs) + + @distributed_trace_async + async def set_file_system_access_policy( + self, signed_identifiers, # type: Dict[str, AccessPolicy] + public_access=None, # type: Optional[Union[str, PublicAccess]] + **kwargs + ): # type: (...) -> Dict[str, Union[str, datetime]] + """Sets the permissions for the specified file system or stored access + policies that may be used with Shared Access Signatures. The permissions + indicate whether files in a file system may be accessed publicly. + + :param signed_identifiers: + A dictionary of access policies to associate with the file system. The + dictionary may contain up to 5 elements. An empty dictionary + will clear the access policies set on the service. + :type signed_identifiers: dict[str, ~azure.storage.filedatalake.AccessPolicy] + :param ~azure.storage.filedatalake.PublicAccess public_access: + To specify whether data in the file system may be accessed publicly and the level of access. + :keyword lease: + Required if the file system has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A datetime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified date/time. + :keyword ~datetime.datetime if_unmodified_since: + A datetime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: filesystem-updated property dict (Etag and last modified). + :rtype: dict[str, str or ~datetime.datetime] + """ + return await self._container_client.set_container_access_policy(signed_identifiers, + public_access=public_access, **kwargs) + + @distributed_trace_async + async def get_file_system_access_policy(self, **kwargs): + # type: (Any) -> Dict[str, Any] + """Gets the permissions for the specified file system. + The permissions indicate whether file system data may be accessed publicly. + + :keyword lease: + If specified, get_file_system_access_policy only succeeds if the + file system's lease is active and matches this ID. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: Access policy information in a dict. + :rtype: dict[str, Any] + """ + access_policy = await self._container_client.get_container_access_policy(**kwargs) + return { + 'public_access': PublicAccess._from_generated(access_policy['public_access']), # pylint: disable=protected-access + 'signed_identifiers': access_policy['signed_identifiers'] + } + + @distributed_trace + def get_paths( + self, path: Optional[str] = None, + recursive: Optional[bool] = True, + max_results: Optional[int] = None, + **kwargs: Any + ) -> AsyncItemPaged["PathProperties"]: + """Returns a generator to list the paths(could be files or directories) under the specified file system. + The generator will lazily follow the continuation tokens returned by + the service. + + :param str path: + Filters the results to return only paths under the specified path. + :param Optional[bool] recursive: Optional. Set True for recursive, False for iterative. + :param int max_results: + An optional value that specifies the maximum + number of items to return per page. If omitted or greater than 5,000, the + response will include up to 5,000 items per page. + :keyword bool upn: + If True, the user identity values returned in the x-ms-owner, x-ms-group, + and x-ms-acl response headers will be transformed from Azure Active Directory Object IDs to User + Principal Names in the owner, group, and acl fields of + :class:`~azure.storage.filedatalake.PathProperties`. If False, the values will be returned + as Azure Active Directory Object IDs. The default value is False. Note that group and application + Object IDs are not translate because they do not have unique friendly names. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: An iterable (auto-paging) response of PathProperties. + :rtype: ~azure.core.paging.AsyncItemPaged[~azure.storage.filedatalake.PathProperties] + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START get_paths_in_file_system] + :end-before: [END get_paths_in_file_system] + :language: python + :dedent: 12 + :caption: List the blobs in the file system. + """ + timeout = kwargs.pop('timeout', None) + command = functools.partial( + self._client.file_system.list_paths, + path=path, + timeout=timeout, + **kwargs) + return AsyncItemPaged( + command, recursive, path=path, max_results=max_results, + page_iterator_class=PathPropertiesPaged, **kwargs) + + @distributed_trace_async + async def create_directory(self, directory, # type: Union[DirectoryProperties, str] + metadata=None, # type: Optional[Dict[str, str]] + **kwargs): + # type: (...) -> DataLakeDirectoryClient + """ + Create directory + + :param directory: + The directory with which to interact. This can either be the name of the directory, + or an instance of DirectoryProperties. + :type directory: str or ~azure.storage.filedatalake.DirectoryProperties + :param metadata: + Name-value pairs associated with the file as metadata. + :type metadata: dict(str, str) + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword lease: + Required if the file has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword str umask: + Optional and only valid if Hierarchical Namespace is enabled for the account. + When creating a file or directory and the parent folder does not have a default ACL, + the umask restricts the permissions of the file or directory to be created. + The resulting permission is given by p & ^u, where p is the permission and u is the umask. + For example, if p is 0777 and u is 0057, then the resulting permission is 0720. + The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. + The umask must be specified in 4-digit octal notation (e.g. 0766). + :keyword str owner: + The owner of the file or directory. + :keyword str group: + The owning group of the file or directory. + :keyword str acl: + Sets POSIX access control rights on files and directories. The value is a + comma-separated list of access control entries. Each access control entry (ACE) consists of a + scope, a type, a user or group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :keyword str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. + :keyword str permissions: + Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: DataLakeDirectoryClient with new directory and metadata. + :rtype: ~azure.storage.file.datalake.aio.DataLakeDirectoryClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START create_directory_from_file_system] + :end-before: [END create_directory_from_file_system] + :language: python + :dedent: 12 + :caption: Create directory in the file system. + """ + directory_client = self.get_directory_client(directory) + await directory_client.create_directory(metadata=metadata, **kwargs) + return directory_client + + @distributed_trace_async + async def delete_directory(self, directory, # type: Union[DirectoryProperties, str] + **kwargs): + # type: (...) -> DataLakeDirectoryClient + """ + Marks the specified path for deletion. + + :param directory: + The directory with which to interact. This can either be the name of the directory, + or an instance of DirectoryProperties. + :type directory: str or ~azure.storage.filedatalake.DirectoryProperties + :keyword lease: + Required if the file has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: DataLakeDirectoryClient after deleting specified directory. + :rtype: ~azure.storage.file.datalake.aio.DataLakeDirectoryClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START delete_directory_from_file_system] + :end-before: [END delete_directory_from_file_system] + :language: python + :dedent: 12 + :caption: Delete directory in the file system. + """ + directory_client = self.get_directory_client(directory) + await directory_client.delete_directory(**kwargs) + return directory_client + + @distributed_trace_async + async def create_file(self, file, # type: Union[FileProperties, str] + **kwargs): + # type: (...) -> DataLakeFileClient + """ + Create file + + :param file: + The file with which to interact. This can either be the name of the file, + or an instance of FileProperties. + :type file: str or ~azure.storage.filedatalake.FileProperties + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword metadata: + Name-value pairs associated with the file as metadata. + :paramtype metadata: dict[str, str] + :keyword lease: + Required if the file has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword str umask: + Optional and only valid if Hierarchical Namespace is enabled for the account. + When creating a file or directory and the parent folder does not have a default ACL, + the umask restricts the permissions of the file or directory to be created. + The resulting permission is given by p & ^u, where p is the permission and u is the umask. + For example, if p is 0777 and u is 0057, then the resulting permission is 0720. + The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. + The umask must be specified in 4-digit octal notation (e.g. 0766). + :keyword str owner: + The owner of the file or directory. + :keyword str group: + The owning group of the file or directory. + :keyword str acl: + Sets POSIX access control rights on files and directories. The value is a + comma-separated list of access control entries. Each access control entry (ACE) consists of a + scope, a type, a user or group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :keyword str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. + :keyword expires_on: + The time to set the file to expiry. + If the type of expires_on is an int, expiration time will be set + as the number of milliseconds elapsed from creation time. + If the type of expires_on is datetime, expiration time will be set + absolute to the time provided. If no time zone info is provided, this + will be interpreted as UTC. + :paramtype expires_on: datetime or int + :keyword str permissions: + Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: DataLakeFileClient with new file created. + :rtype: ~azure.storage.file.datalake.aio.DataLakeFileClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START create_file_from_file_system] + :end-before: [END create_file_from_file_system] + :language: python + :dedent: 12 + :caption: Create file in the file system. + """ + file_client = self.get_file_client(file) + await file_client.create_file(**kwargs) + return file_client + + @distributed_trace_async + async def delete_file(self, file, # type: Union[FileProperties, str] + **kwargs): + # type: (...) -> DataLakeFileClient + """ + Marks the specified file for deletion. + + :param file: + The file with which to interact. This can either be the name of the file, + or an instance of FileProperties. + :type file: str or ~azure.storage.filedatalake.FileProperties + :keyword lease: + Required if the file has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: DataLakeFileClient after deleting specified file. + :rtype: ~azure.storage.file.datalake.aio.DataLakeFileClient + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START delete_file_from_file_system] + :end-before: [END delete_file_from_file_system] + :language: python + :dedent: 12 + :caption: Delete file in the file system. + """ + file_client = self.get_file_client(file) + await file_client.delete_file(**kwargs) + return file_client + + @distributed_trace_async + async def _undelete_path(self, deleted_path_name, deletion_id, **kwargs): + # type: (str, str, **Any) -> Union[DataLakeDirectoryClient, DataLakeFileClient] + """Restores soft-deleted path. + + Operation will only be successful if used within the specified number of days + set in the delete retention policy. + + .. versionadded:: 12.4.0 + This operation was introduced in API version '2020-06-12'. + + :param str deleted_path_name: + Specifies the name of the deleted container to restore. + :param str deletion_id: + Specifies the version of the deleted container to restore. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: Returns the DataLake client for the restored soft-deleted path. + :rtype: ~azure.storage.file.datalake.aio.DataLakeDirectoryClient + or azure.storage.file.datalake.aio.DataLakeFileClient + """ + _, url, undelete_source = self._undelete_path_options(deleted_path_name, deletion_id) + + pipeline = AsyncPipeline( + transport=AsyncTransportWrapper(self._pipeline._transport), # pylint: disable = protected-access + policies=self._pipeline._impl_policies # pylint: disable = protected-access + ) + path_client = AzureDataLakeStorageRESTAPI( + url, filesystem=self.file_system_name, path=deleted_path_name, pipeline=pipeline) + try: + is_file = await path_client.path.undelete(undelete_source=undelete_source, cls=is_file_path, **kwargs) + if is_file: + return self.get_file_client(deleted_path_name) + return self.get_directory_client(deleted_path_name) + except HttpResponseError as error: + process_storage_error(error) + + def _get_root_directory_client(self): + # type: () -> DataLakeDirectoryClient + """Get a client to interact with the root directory. + + :returns: A DataLakeDirectoryClient. + :rtype: ~azure.storage.filedatalake.aio.DataLakeDirectoryClient + """ + return self.get_directory_client('/') + + def get_directory_client(self, directory # type: Union[DirectoryProperties, str] + ): + # type: (...) -> DataLakeDirectoryClient + """Get a client to interact with the specified directory. + + The directory need not already exist. + + :param directory: + The directory with which to interact. This can either be the name of the directory, + or an instance of DirectoryProperties. + :type directory: str or ~azure.storage.filedatalake.DirectoryProperties + :returns: A DataLakeDirectoryClient. + :rtype: ~azure.storage.filedatalake.aio.DataLakeDirectoryClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START get_directory_client_from_file_system] + :end-before: [END get_directory_client_from_file_system] + :language: python + :dedent: 12 + :caption: Getting the directory client to interact with a specific directory. + """ + try: + directory_name = directory.get('name') + except AttributeError: + directory_name = str(directory) + _pipeline = AsyncPipeline( + transport=AsyncTransportWrapper(self._pipeline._transport), # pylint: disable = protected-access + policies=self._pipeline._impl_policies # pylint: disable = protected-access + ) + return DataLakeDirectoryClient(self.url, self.file_system_name, directory_name=directory_name, + credential=self._raw_credential, + api_version=self.api_version, + _configuration=self._config, _pipeline=_pipeline, + _hosts=self._hosts, + loop=self._loop) + + def get_file_client(self, file_path # type: Union[FileProperties, str] + ): + # type: (...) -> DataLakeFileClient + """Get a client to interact with the specified file. + + The file need not already exist. + + :param file_path: + The file with which to interact. This can either be the path of the file(from root directory), + or an instance of FileProperties. eg. directory/subdirectory/file + :type file_path: str or ~azure.storage.filedatalake.FileProperties + :returns: A DataLakeFileClient. + :rtype: ~azure.storage.filedatalake.aio.DataLakeFileClient + + .. admonition:: Example: + + .. literalinclude:: ../samples/datalake_samples_file_system_async.py + :start-after: [START get_file_client_from_file_system] + :end-before: [END get_file_client_from_file_system] + :language: python + :dedent: 12 + :caption: Getting the file client to interact with a specific file. + """ + try: + file_path = file_path.get('name') + except AttributeError: + file_path = str(file_path) + _pipeline = AsyncPipeline( + transport=AsyncTransportWrapper(self._pipeline._transport), # pylint: disable = protected-access + policies=self._pipeline._impl_policies # pylint: disable = protected-access + ) + return DataLakeFileClient( + self.url, self.file_system_name, file_path=file_path, credential=self._raw_credential, + api_version=self.api_version, + _hosts=self._hosts, _configuration=self._config, _pipeline=_pipeline, loop=self._loop) + + @distributed_trace + def list_deleted_paths(self, **kwargs): + # type: (Any) -> AsyncItemPaged[DeletedPathProperties] + """Returns a generator to list the deleted (file or directory) paths under the specified file system. + The generator will lazily follow the continuation tokens returned by + the service. + + .. versionadded:: 12.4.0 + This operation was introduced in API version '2020-06-12'. + + :keyword str path_prefix: + Filters the results to return only paths under the specified path. + :keyword int results_per_page: + An optional value that specifies the maximum number of items to return per page. + If omitted or greater than 5,000, the response will include up to 5,000 items per page. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: An iterable (auto-paging) response of DeletedPathProperties. + :rtype: + ~azure.core.paging.AsyncItemPaged[~azure.storage.filedatalake.DeletedPathProperties] + """ + path_prefix = kwargs.pop('path_prefix', None) + timeout = kwargs.pop('timeout', None) + results_per_page = kwargs.pop('results_per_page', None) + command = functools.partial( + self._datalake_client_for_blob_operation.file_system.list_blob_hierarchy_segment, + showonly=ListBlobsIncludeItem.deleted, + timeout=timeout, + **kwargs) + return AsyncItemPaged( + command, prefix=path_prefix, page_iterator_class=DeletedPathPropertiesPaged, + results_per_page=results_per_page, **kwargs) diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_list_paths_helper.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_list_paths_helper.py new file mode 100644 index 00000000..4d802635 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_list_paths_helper.py @@ -0,0 +1,176 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +# pylint: disable=too-few-public-methods +from azure.core.exceptions import HttpResponseError +from azure.core.async_paging import AsyncPageIterator + +from .._deserialize import process_storage_error, get_deleted_path_properties_from_generated_code, \ + return_headers_and_deserialized_path_list +from .._generated.models import BlobItemInternal, BlobPrefix as GenBlobPrefix + +from .._shared.models import DictMixin +from .._shared.response_handlers import return_context_and_deserialized +from .._generated.models import Path +from .._models import PathProperties + + +class DeletedPathPropertiesPaged(AsyncPageIterator): + """An Iterable of deleted path properties. + + :ivar str service_endpoint: The service URL. + :ivar str prefix: A path name prefix being used to filter the list. + :ivar str marker: The continuation token of the current page of results. + :ivar int results_per_page: The maximum number of results retrieved per API call. + :ivar str continuation_token: The continuation token to retrieve the next page of results. + :ivar str location_mode: The location mode being used to list results. The available + options include "primary" and "secondary". + :ivar current_page: The current page of listed results. + :vartype current_page: list(~azure.storage.filedatalake.DeletedPathProperties) + :ivar str container: The container that the paths are listed from. + :ivar str delimiter: A delimiting character used for hierarchy listing. + + :param callable command: Function to retrieve the next page of items. + """ + def __init__( + self, command, + container=None, + prefix=None, + results_per_page=None, + continuation_token=None, + delimiter=None, + location_mode=None): + super(DeletedPathPropertiesPaged, self).__init__( + get_next=self._get_next_cb, + extract_data=self._extract_data_cb, + continuation_token=continuation_token or "" + ) + self._command = command + self.service_endpoint = None + self.prefix = prefix + self.marker = None + self.results_per_page = results_per_page + self.container = container + self.delimiter = delimiter + self.current_page = None + self.location_mode = location_mode + + async def _get_next_cb(self, continuation_token): + try: + return await self._command( + prefix=self.prefix, + marker=continuation_token or None, + max_results=self.results_per_page, + cls=return_context_and_deserialized, + use_location=self.location_mode) + except HttpResponseError as error: + process_storage_error(error) + + async def _extract_data_cb(self, get_next_return): + self.location_mode, self._response = get_next_return + self.service_endpoint = self._response.service_endpoint + self.prefix = self._response.prefix + self.marker = self._response.marker + self.results_per_page = self._response.max_results + self.container = self._response.container_name + self.current_page = self._response.segment.blob_prefixes + self._response.segment.blob_items + self.current_page = [self._build_item(item) for item in self.current_page] + self.delimiter = self._response.delimiter + + return self._response.next_marker or None, self.current_page + + def _build_item(self, item): + if isinstance(item, BlobItemInternal): + file_props = get_deleted_path_properties_from_generated_code(item) + file_props.file_system = self.container + return file_props + if isinstance(item, GenBlobPrefix): + return DirectoryPrefix( + container=self.container, + prefix=item.name, + results_per_page=self.results_per_page, + location_mode=self.location_mode) + return item + + +class DirectoryPrefix(DictMixin): + """Directory prefix. + + :ivar str name: Name of the deleted directory. + :ivar int results_per_page: The maximum number of results retrieved per API call. + :ivar str location_mode: The location mode being used to list results. The available + options include "primary" and "secondary". + :ivar str file_system: The file system that the deleted paths are listed from. + :ivar str delimiter: A delimiting character used for hierarchy listing. + """ + def __init__(self, **kwargs): + self.name = kwargs.get('prefix') + self.results_per_page = kwargs.get('results_per_page') + self.file_system = kwargs.get('container') + self.delimiter = kwargs.get('delimiter') + self.location_mode = kwargs.get('location_mode') + + +class PathPropertiesPaged(AsyncPageIterator): + """An Iterable of Path properties. + + :ivar str path: Filters the results to return only paths under the specified path. + :ivar int results_per_page: The maximum number of results retrieved per API call. + :ivar str continuation_token: The continuation token to retrieve the next page of results. + :ivar list(~azure.storage.filedatalake.PathProperties) current_page: The current page of listed results. + + :param callable command: Function to retrieve the next page of items. + :param str path: Filters the results to return only paths under the specified path. + :param int max_results: The maximum number of paths to retrieve per + call. + :param str continuation_token: An opaque continuation token. + """ + + def __init__( + self, command, + recursive, + path=None, + max_results=None, + continuation_token=None, + upn=None): + super(PathPropertiesPaged, self).__init__( + get_next=self._get_next_cb, + extract_data=self._extract_data_cb, + continuation_token=continuation_token or "" + ) + self._command = command + self.recursive = recursive + self.results_per_page = max_results + self.path = path + self.upn = upn + self.current_page = None + self.path_list = None + + async def _get_next_cb(self, continuation_token): + try: + return await self._command( + self.recursive, + continuation=continuation_token or None, + path=self.path, + max_results=self.results_per_page, + upn=self.upn, + cls=return_headers_and_deserialized_path_list) + except HttpResponseError as error: + process_storage_error(error) + + async def _extract_data_cb(self, get_next_return): + self.path_list, self._response = get_next_return + self.current_page = [self._build_item(item) for item in self.path_list] + + return self._response['continuation'] or None, self.current_page + + @staticmethod + def _build_item(item): + if isinstance(item, PathProperties): + return item + if isinstance(item, Path): + path = PathProperties._from_generated(item) # pylint: disable=protected-access + return path + return item diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_models.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_models.py new file mode 100644 index 00000000..923cbb61 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_models.py @@ -0,0 +1,40 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +# pylint: disable=too-few-public-methods +from azure.storage.blob.aio._models import ContainerPropertiesPaged +from .._models import FileSystemProperties + + +class FileSystemPropertiesPaged(ContainerPropertiesPaged): + """An Iterable of File System properties. + + :ivar str service_endpoint: The service URL. + :ivar str prefix: A file system name prefix being used to filter the list. + :ivar str marker: The continuation token of the current page of results. + :ivar int results_per_page: The maximum number of results retrieved per API call. + :ivar str continuation_token: The continuation token to retrieve the next page of results. + :ivar str location_mode: The location mode being used to list results. The available + options include "primary" and "secondary". + :ivar current_page: The current page of listed results. + :vartype current_page: list(~azure.storage.filedatalake.FileSystemProperties) + + :param callable command: Function to retrieve the next page of items. + :param str prefix: Filters the results to return only file systems whose names + begin with the specified prefix. + :param int results_per_page: The maximum number of file system names to retrieve per + call. + :param str continuation_token: An opaque continuation token. + """ + + def __init__(self, *args, **kwargs): + super(FileSystemPropertiesPaged, self).__init__( + *args, + **kwargs + ) + + @staticmethod + def _build_item(item): + return FileSystemProperties._from_generated(item) # pylint: disable=protected-access diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_path_client_async.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_path_client_async.py new file mode 100644 index 00000000..774f687d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_path_client_async.py @@ -0,0 +1,901 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +# pylint: disable=invalid-overridden-method, docstring-keyword-should-match-keyword-only + +from datetime import datetime +from typing import ( + Any, Dict, Optional, Union, + TYPE_CHECKING +) + +from azure.core.exceptions import AzureError, HttpResponseError +from azure.core.tracing.decorator_async import distributed_trace_async +from azure.storage.blob.aio import BlobClient +from .._serialize import get_api_version, compare_api_versions +from .._shared.base_client_async import AsyncStorageAccountHostsMixin +from .._path_client import PathClient as PathClientBase +from .._models import DirectoryProperties, AccessControlChangeResult, AccessControlChangeFailure, \ + AccessControlChangeCounters, AccessControlChanges +from .._generated.aio import AzureDataLakeStorageRESTAPI +from ._data_lake_lease_async import DataLakeLeaseClient +from .._deserialize import process_storage_error +from .._shared.policies_async import ExponentialRetry + +if TYPE_CHECKING: + from azure.core.credentials import AzureNamedKeyCredential, AzureSasCredential + from azure.core.credentials_async import AsyncTokenCredential + from .._models import ContentSettings, FileProperties + + +class PathClient(AsyncStorageAccountHostsMixin, PathClientBase): + """A base client for interacting with a DataLake file/directory, even if the file/directory may not + yet exist. + + :param str account_url: + The URI to the storage account. + :param str file_system_name: + The file system for the directory or files. + :param str file_path: + The whole file path, so that to interact with a specific file. + eg. "{directory}/{subdirectory}/{file}" + :param credential: + The credentials with which to authenticate. This is optional if the + account URL already has a SAS token. The value can be a SAS token string, + an instance of a AzureSasCredential or AzureNamedKeyCredential from azure.core.credentials, + an account shared access key, or an instance of a TokenCredentials class from azure.identity. + If the resource URI already contains a SAS token, this will be ignored in favor of an explicit credential + - except in the case of AzureSasCredential, where the conflicting SAS tokens will raise a ValueError. + If using an instance of AzureNamedKeyCredential, "name" should be the storage account name, and "key" + should be the storage account key. + :type credential: + ~azure.core.credentials.AzureNamedKeyCredential or + ~azure.core.credentials.AzureSasCredential or + ~azure.core.credentials_async.AsyncTokenCredential or + str or dict[str, str] or None + :keyword str api_version: + The Storage API version to use for requests. Default value is the most recent service version that is + compatible with the current SDK. Setting to an older version may result in reduced feature compatibility. + :keyword str audience: The audience to use when requesting tokens for Azure Active Directory + authentication. Only has an effect when credential is of type TokenCredential. The value could be + https://storage.azure.com/ (default) or https://<account>.blob.core.windows.net. + """ + def __init__( + self, account_url: str, + file_system_name: str, + path_name: str, + credential: Optional[Union[str, Dict[str, str], "AzureNamedKeyCredential", "AzureSasCredential", "AsyncTokenCredential"]] = None, # pylint: disable=line-too-long + **kwargs: Any + ) -> None: + kwargs['retry_policy'] = kwargs.get('retry_policy') or ExponentialRetry(**kwargs) + + super(PathClient, self).__init__(account_url, # pylint: disable=specify-parameter-names-in-call + file_system_name, path_name, + credential=credential, + **kwargs) # type: ignore + + kwargs.pop('_hosts', None) + + self._blob_client = BlobClient(account_url=self._blob_account_url, container_name=self.file_system_name, + blob_name=self.path_name, + credential=credential, + _hosts=self._blob_client._hosts, + **kwargs) + self._api_version = get_api_version(kwargs) + self._client = self._build_generated_client(self.url) + self._datalake_client_for_blob_operation = self._build_generated_client(self._blob_client.url) + self._loop = kwargs.get('loop', None) + + def _build_generated_client(self, url: str) -> AzureDataLakeStorageRESTAPI: + client = AzureDataLakeStorageRESTAPI( + url, + base_url=url, + file_system=self.file_system_name, + path=self.path_name, + pipeline=self._pipeline + ) + client._config.version = self._api_version # pylint: disable=protected-access + return client + + async def __aexit__(self, *args): + await self._blob_client.close() + await self._datalake_client_for_blob_operation.close() + await super(PathClient, self).__aexit__(*args) + + async def close(self): + # type: () -> None + """ This method is to close the sockets opened by the client. + It need not be used when using with a context manager. + """ + await self.__aexit__() + + async def _create(self, resource_type, content_settings=None, metadata=None, **kwargs): + # type: (...) -> Dict[str, Union[str, datetime]] + """ + Create directory or file + + :param resource_type: + Required for Create File and Create Directory. + The value must be "file" or "directory". Possible values include: + 'directory', 'file' + :type resource_type: str + :param ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :param metadata: + Name-value pairs associated with the file/directory as metadata. + :type metadata: dict(str, str) + :keyword lease: + Required if the file/directory has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword str umask: + Optional and only valid if Hierarchical Namespace is enabled for the account. + When creating a file or directory and the parent folder does not have a default ACL, + the umask restricts the permissions of the file or directory to be created. + The resulting permission is given by p & ^u, where p is the permission and u is the umask. + For example, if p is 0777 and u is 0057, then the resulting permission is 0720. + The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. + The umask must be specified in 4-digit octal notation (e.g. 0766). + :keyword str owner: + The owner of the file or directory. + :keyword str group: + The owning group of the file or directory. + :keyword str acl: + Sets POSIX access control rights on files and directories. The value is a + comma-separated list of access control entries. Each access control entry (ACE) consists of a + scope, a type, a user or group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :keyword str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. + :keyword expires_on: + The time to set the file to expiry. + If the type of expires_on is an int, expiration time will be set + as the number of milliseconds elapsed from creation time. + If the type of expires_on is datetime, expiration time will be set + absolute to the time provided. If no time zone info is provided, this + will be interpreted as UTC. + :paramtype expires_on: datetime or int + :keyword permissions: + Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + :type permissions: str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: A dictionary of response headers. + :keyword str encryption_context: + Specifies the encryption context to set on the file. + :rtype: dict[str, Union[str, datetime]] + """ + lease_id = kwargs.get('lease_id', None) + lease_duration = kwargs.get('lease_duration', None) + if lease_id and not lease_duration: + raise ValueError("Please specify a lease_id and a lease_duration.") + if lease_duration and not lease_id: + raise ValueError("Please specify a lease_id and a lease_duration.") + options = self._create_path_options( + resource_type, + content_settings=content_settings, + metadata=metadata, + **kwargs) + try: + return await self._client.path.create(**options) + except HttpResponseError as error: + process_storage_error(error) + + async def _delete(self, **kwargs): + # type: (**Any) -> Dict[Union[datetime, str]] + """ + Marks the specified path for deletion. + + :keyword lease: + Required if the file/directory has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: A dictionary containing information about the deleted path. + :rtype: dict[str, Any] + """ + # Perform paginated delete only if using OAuth, deleting a directory, and api version is 2023-08-03 or later + # The pagination is only for ACL checks, the final request remains the atomic delete operation + paginated = None + if (compare_api_versions(self.api_version, '2023-08-03') >= 0 and + hasattr(self.credential, 'get_token') and + kwargs.get('recursive')): # Directory delete will always specify recursive + paginated = True + + options = self._delete_path_options(paginated, **kwargs) + try: + response_headers = await self._client.path.delete(**options) + # Loop until continuation token is None for paginated delete + while response_headers['continuation']: + response_headers = await self._client.path.delete( + continuation=response_headers['continuation'], + **options) + + return response_headers + except HttpResponseError as error: + process_storage_error(error) + + @distributed_trace_async + async def set_access_control(self, owner=None, # type: Optional[str] + group=None, # type: Optional[str] + permissions=None, # type: Optional[str] + acl=None, # type: Optional[str] + **kwargs): + # type: (...) -> Dict[str, Union[str, datetime]] + """ + Set the owner, group, permissions, or access control list for a path. + + :param owner: + Optional. The owner of the file or directory. + :type owner: str + :param group: + Optional. The owning group of the file or directory. + :type group: str + :param permissions: + Optional and only valid if Hierarchical Namespace + is enabled for the account. Sets POSIX access permissions for the file + owner, the file owning group, and others. Each class may be granted + read, write, or execute permission. The sticky bit is also supported. + Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are + supported. + permissions and acl are mutually exclusive. + :type permissions: str + :param acl: + Sets POSIX access control rights on files and directories. + The value is a comma-separated list of access control entries. Each + access control entry (ACE) consists of a scope, a type, a user or + group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + permissions and acl are mutually exclusive. + :type acl: str + :keyword lease: + Required if the file/directory has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: dict containing access control options after setting modifications (Etag and last modified). + :rtype: dict[str, str] or dict[str, ~datetime.datetime] + """ + options = self._set_access_control_options(owner=owner, group=group, permissions=permissions, acl=acl, **kwargs) + try: + return await self._client.path.set_access_control(**options) + except HttpResponseError as error: + process_storage_error(error) + + @distributed_trace_async + async def get_access_control(self, upn=None, # type: Optional[bool] + **kwargs): + # type: (...) -> Dict[str, Any] + """ + Get the owner, group, permissions, or access control list for a path. + + :param upn: + Optional. Valid only when Hierarchical Namespace is + enabled for the account. If "true", the user identity values returned + in the x-ms-owner, x-ms-group, and x-ms-acl response headers will be + transformed from Azure Active Directory Object IDs to User Principal + Names. If "false", the values will be returned as Azure Active + Directory Object IDs. The default value is false. Note that group and + application Object IDs are not translated because they do not have + unique friendly names. + :type upn: bool + :keyword lease: + Required if the file/directory has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: response dict containing access control options (Etag and last modified). + :rtype: dict[str, str] or dict[str, ~datetime.datetime] + """ + options = self._get_access_control_options(upn=upn, **kwargs) + try: + return await self._client.path.get_properties(**options) + except HttpResponseError as error: + process_storage_error(error) + + @distributed_trace_async + async def set_access_control_recursive(self, acl, **kwargs): + # type: (str, **Any) -> AccessControlChangeResult + """ + Sets the Access Control on a path and sub-paths. + + :param acl: + Sets POSIX access control rights on files and directories. + The value is a comma-separated list of access control entries. Each + access control entry (ACE) consists of a scope, a type, a user or + group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :type acl: str + :keyword func(~azure.storage.filedatalake.AccessControlChanges) progress_hook: + Callback where the caller can track progress of the operation + as well as collect paths that failed to change Access Control. + :keyword str continuation_token: + Optional continuation token that can be used to resume previously stopped operation. + :keyword int batch_size: + Optional. If data set size exceeds batch size then operation will be split into multiple + requests so that progress can be tracked. Batch size should be between 1 and 2000. + The default when unspecified is 2000. + :keyword int max_batches: + Optional. Defines maximum number of batches that single change Access Control operation can execute. + If maximum is reached before all sub-paths are processed, + then continuation token can be used to resume operation. + Empty value indicates that maximum number of batches in unbound and operation continues till end. + :keyword bool continue_on_failure: + If set to False, the operation will terminate quickly on encountering user errors (4XX). + If True, the operation will ignore user errors and proceed with the operation on other sub-entities of + the directory. + Continuation token will only be returned when continue_on_failure is True in case of user errors. + If not set the default value is False for this. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: A summary of the recursive operations, including the count of successes and failures, + as well as a continuation token in case the operation was terminated prematurely. + :rtype: :~azure.storage.filedatalake.AccessControlChangeResult` + :raises ~azure.core.exceptions.AzureError: + User can restart the operation using continuation_token field of AzureError if the token is available. + """ + if not acl: + raise ValueError("The Access Control List must be set for this operation") + + progress_hook = kwargs.pop('progress_hook', None) + max_batches = kwargs.pop('max_batches', None) + options = self._set_access_control_recursive_options(mode='set', acl=acl, **kwargs) + return await self._set_access_control_internal(options=options, progress_hook=progress_hook, + max_batches=max_batches) + + @distributed_trace_async + async def update_access_control_recursive(self, acl, **kwargs): + # type: (str, **Any) -> AccessControlChangeResult + """ + Modifies the Access Control on a path and sub-paths. + + :param acl: + Modifies POSIX access control rights on files and directories. + The value is a comma-separated list of access control entries. Each + access control entry (ACE) consists of a scope, a type, a user or + group identifier, and permissions in the format + "[scope:][type]:[id]:[permissions]". + :type acl: str + :keyword func(~azure.storage.filedatalake.AccessControlChanges) progress_hook: + Callback where the caller can track progress of the operation + as well as collect paths that failed to change Access Control. + :keyword str continuation_token: + Optional continuation token that can be used to resume previously stopped operation. + :keyword int batch_size: + Optional. If data set size exceeds batch size then operation will be split into multiple + requests so that progress can be tracked. Batch size should be between 1 and 2000. + The default when unspecified is 2000. + :keyword int max_batches: + Optional. Defines maximum number of batches that single, + change Access Control operation can execute. + If maximum is reached before all sub-paths are processed, + then continuation token can be used to resume operation. + Empty value indicates that maximum number of batches in unbound and operation continues till end. + :keyword bool continue_on_failure: + If set to False, the operation will terminate quickly on encountering user errors (4XX). + If True, the operation will ignore user errors and proceed with the operation on other sub-entities of + the directory. + Continuation token will only be returned when continue_on_failure is True in case of user errors. + If not set the default value is False for this. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: A summary of the recursive operations, including the count of successes and failures, + as well as a continuation token in case the operation was terminated prematurely. + :rtype: :~azure.storage.filedatalake.AccessControlChangeResult` + :raises ~azure.core.exceptions.AzureError: + User can restart the operation using continuation_token field of AzureError if the token is available. + """ + if not acl: + raise ValueError("The Access Control List must be set for this operation") + + progress_hook = kwargs.pop('progress_hook', None) + max_batches = kwargs.pop('max_batches', None) + options = self._set_access_control_recursive_options(mode='modify', acl=acl, **kwargs) + return await self._set_access_control_internal(options=options, progress_hook=progress_hook, + max_batches=max_batches) + + @distributed_trace_async + async def remove_access_control_recursive(self, + acl, + **kwargs): + # type: (str, **Any) -> AccessControlChangeResult + """ + Removes the Access Control on a path and sub-paths. + + :param acl: + Removes POSIX access control rights on files and directories. + The value is a comma-separated list of access control entries. Each + access control entry (ACE) consists of a scope, a type, and a user or + group identifier in the format "[scope:][type]:[id]". + :type acl: str + :keyword func(~azure.storage.filedatalake.AccessControlChanges) progress_hook: + Callback where the caller can track progress of the operation + as well as collect paths that failed to change Access Control. + :keyword str continuation_token: + Optional continuation token that can be used to resume previously stopped operation. + :keyword int batch_size: + Optional. If data set size exceeds batch size then operation will be split into multiple + requests so that progress can be tracked. Batch size should be between 1 and 2000. + The default when unspecified is 2000. + :keyword int max_batches: + Optional. Defines maximum number of batches that single change Access Control operation can execute. + If maximum is reached before all sub-paths are processed, + then continuation token can be used to resume operation. + Empty value indicates that maximum number of batches in unbound and operation continues till end. + :keyword bool continue_on_failure: + If set to False, the operation will terminate quickly on encountering user errors (4XX). + If True, the operation will ignore user errors and proceed with the operation on other sub-entities of + the directory. + Continuation token will only be returned when continue_on_failure is True in case of user errors. + If not set the default value is False for this. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :return: A summary of the recursive operations, including the count of successes and failures, + as well as a continuation token in case the operation was terminated prematurely. + :rtype: :~azure.storage.filedatalake.AccessControlChangeResult` + :raises ~azure.core.exceptions.AzureError: + User can restart the operation using continuation_token field of AzureError if the token is available. + """ + if not acl: + raise ValueError("The Access Control List must be set for this operation") + + progress_hook = kwargs.pop('progress_hook', None) + max_batches = kwargs.pop('max_batches', None) + options = self._set_access_control_recursive_options(mode='remove', acl=acl, **kwargs) + return await self._set_access_control_internal(options=options, progress_hook=progress_hook, + max_batches=max_batches) + + async def _set_access_control_internal(self, options, progress_hook, max_batches=None): + try: + continue_on_failure = options.get('force_flag') + total_directories_successful = 0 + total_files_success = 0 + total_failure_count = 0 + batch_count = 0 + last_continuation_token = None + current_continuation_token = None + continue_operation = True + while continue_operation: + headers, resp = await self._client.path.set_access_control_recursive(**options) + + # make a running tally so that we can report the final results + total_directories_successful += resp.directories_successful + total_files_success += resp.files_successful + total_failure_count += resp.failure_count + batch_count += 1 + current_continuation_token = headers['continuation'] + + if current_continuation_token is not None: + last_continuation_token = current_continuation_token + + if progress_hook is not None: + await progress_hook(AccessControlChanges( + batch_counters=AccessControlChangeCounters( + directories_successful=resp.directories_successful, + files_successful=resp.files_successful, + failure_count=resp.failure_count, + ), + aggregate_counters=AccessControlChangeCounters( + directories_successful=total_directories_successful, + files_successful=total_files_success, + failure_count=total_failure_count, + ), + batch_failures=[AccessControlChangeFailure( + name=failure.name, + is_directory=failure.type == 'DIRECTORY', + error_message=failure.error_message) for failure in resp.failed_entries], + continuation=last_continuation_token)) + + # update the continuation token, if there are more operations that cannot be completed in a single call + max_batches_satisfied = (max_batches is not None and batch_count == max_batches) + continue_operation = bool(current_continuation_token) and not max_batches_satisfied + options['continuation'] = current_continuation_token + + # currently the service stops on any failure, so we should send back the last continuation token + # for the user to retry the failed updates + # otherwise we should just return what the service gave us + return AccessControlChangeResult(counters=AccessControlChangeCounters( + directories_successful=total_directories_successful, + files_successful=total_files_success, + failure_count=total_failure_count), + continuation=last_continuation_token + if total_failure_count > 0 and not continue_on_failure else current_continuation_token) + except HttpResponseError as error: + error.continuation_token = last_continuation_token + process_storage_error(error) + except AzureError as error: + error.continuation_token = last_continuation_token + raise error + + async def _rename_path(self, rename_source, **kwargs): + # type: (str, **Any) -> Dict[str, Any] + """ + Rename directory or file + + :param rename_source: The value must have the following format: "/{filesystem}/{path}". + :type rename_source: str + :keyword ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set path properties. + :keyword source_lease: A lease ID for the source path. If specified, + the source path must have an active lease and the lease ID must + match. + :paramtype source_lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword lease: + Required if the file/directory has an active lease. Value can be a LeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~datetime.datetime source_if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime source_if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str source_etag: + The source ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions source_match_condition: + The source match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: response dict containing information about the renamed path. + :rtype: dict[str, Any] + """ + options = self._rename_path_options( + rename_source, + **kwargs) + try: + return await self._client.path.create(**options) + except HttpResponseError as error: + process_storage_error(error) + + async def _get_path_properties(self, **kwargs): + # type: (**Any) -> Union[FileProperties, DirectoryProperties] + """Returns all user-defined metadata, standard HTTP properties, and + system properties for the file or directory. It does not return the content of the directory or file. + + :keyword lease: + Required if the directory or file has an active lease. Value can be a DataLakeLeaseClient object + or the lease ID as a string. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Decrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + Required if the file/directory was created with a customer-provided key. + :keyword bool upn: + If True, the user identity values returned in the x-ms-owner, x-ms-group, + and x-ms-acl response headers will be transformed from Azure Active Directory Object IDs to User + Principal Names in the owner, group, and acl fields of the respective property object returned. + If False, the values will be returned as Azure Active Directory Object IDs. + The default value is False. Note that group and application Object IDs are not translate + because they do not have unique friendly names. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: + Information including user-defined metadata, standard HTTP properties, + and system properties for the file or directory. + :rtype: DirectoryProperties or FileProperties + """ + upn = kwargs.pop('upn', None) + if upn: + headers = kwargs.pop('headers', {}) + headers['x-ms-upn'] = str(upn) + kwargs['headers'] = headers + path_properties = await self._blob_client.get_blob_properties(**kwargs) + return path_properties + + async def _exists(self, **kwargs): + # type: (**Any) -> bool + """ + Returns True if a path exists and returns False otherwise. + + :kwarg int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: True if a path exists, False otherwise. + :rtype: bool + """ + return await self._blob_client.exists(**kwargs) + + @distributed_trace_async + async def set_metadata(self, metadata, # type: Dict[str, str] + **kwargs): + # type: (...) -> Dict[str, Union[str, datetime]] + """Sets one or more user-defined name-value pairs for the specified + file system. Each call to this operation replaces all existing metadata + attached to the file system. To remove all metadata from the file system, + call this operation with no metadata dict. + + :param metadata: + A dict containing name-value pairs to associate with the file system as + metadata. Example: {'category':'test'} + :type metadata: dict[str, str] + :keyword lease: + If specified, set_file_system_metadata only succeeds if the + file system's lease is active and matches this ID. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword ~azure.storage.filedatalake.CustomerProvidedEncryptionKey cpk: + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: file system-updated property dict (Etag and last modified). + :rtype: dict[str, str] or dict[str, ~datetime.datetime] + """ + return await self._blob_client.set_blob_metadata(metadata=metadata, **kwargs) + + @distributed_trace_async + async def set_http_headers(self, content_settings=None, # type: Optional[ContentSettings] + **kwargs): + # type: (...) -> Dict[str, Any] + """Sets system properties on the file or directory. + + If one property is set for the content_settings, all properties will be overridden. + + :param ~azure.storage.filedatalake.ContentSettings content_settings: + ContentSettings object used to set file/directory properties. + :keyword lease: + If specified, set_file_system_metadata only succeeds if the + file system's lease is active and matches this ID. + :paramtype lease: ~azure.storage.filedatalake.aio.DataLakeLeaseClient or str + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: file/directory-updated property dict (Etag and last modified) + :rtype: dict[str, Any] + """ + return await self._blob_client.set_http_headers(content_settings=content_settings, **kwargs) + + @distributed_trace_async + async def acquire_lease(self, lease_duration=-1, # type: Optional[int] + lease_id=None, # type: Optional[str] + **kwargs): + # type: (...) -> DataLakeLeaseClient + """ + Requests a new lease. If the file or directory does not have an active lease, + the DataLake service creates a lease on the file/directory and returns a new + lease ID. + + :param int lease_duration: + Specifies the duration of the lease, in seconds, or negative one + (-1) for a lease that never expires. A non-infinite lease can be + between 15 and 60 seconds. A lease duration cannot be changed + using renew or change. Default is -1 (infinite lease). + :param str lease_id: + Proposed lease ID, in a GUID string format. The DataLake service returns + 400 (Invalid request) if the proposed lease ID is not in the correct format. + :keyword ~datetime.datetime if_modified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + :keyword ~datetime.datetime if_unmodified_since: + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + :keyword str etag: + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + :keyword ~azure.core.MatchConditions match_condition: + The match condition to use upon the etag. + :keyword int timeout: + Sets the server-side timeout for the operation in seconds. For more details see + https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations. + This value is not tracked or validated on the client. To configure client-side network timesouts + see `here <https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-file-datalake + #other-client--per-operation-configuration>`_. + :returns: A DataLakeLeaseClient object, that can be run in a context manager. + :rtype: ~azure.storage.filedatalake.aio.DataLakeLeaseClient + """ + lease = DataLakeLeaseClient(self, lease_id=lease_id) # type: ignore + await lease.acquire(lease_duration=lease_duration, **kwargs) + return lease diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_upload_helper.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_upload_helper.py new file mode 100644 index 00000000..40d24a03 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/aio/_upload_helper.py @@ -0,0 +1,104 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +from azure.core.exceptions import HttpResponseError +from .._deserialize import ( + process_storage_error) +from .._shared.response_handlers import return_response_headers +from .._shared.uploads_async import ( + upload_data_chunks, + DataLakeFileChunkUploader, upload_substream_blocks) + + +def _any_conditions(modified_access_conditions=None, **kwargs): # pylint: disable=unused-argument + return any([ + modified_access_conditions.if_modified_since, + modified_access_conditions.if_unmodified_since, + modified_access_conditions.if_none_match, + modified_access_conditions.if_match + ]) + + +async def upload_datalake_file( + client=None, + stream=None, + length=None, + overwrite=None, + validate_content=None, + max_concurrency=None, + file_settings=None, + **kwargs): + try: + if length == 0: + return {} + properties = kwargs.pop('properties', None) + umask = kwargs.pop('umask', None) + permissions = kwargs.pop('permissions', None) + path_http_headers = kwargs.pop('path_http_headers', None) + modified_access_conditions = kwargs.pop('modified_access_conditions', None) + chunk_size = kwargs.pop('chunk_size', 100 * 1024 * 1024) + encryption_context = kwargs.pop('encryption_context', None) + + if not overwrite: + # if customers didn't specify access conditions, they cannot flush data to existing file + if not _any_conditions(modified_access_conditions): + modified_access_conditions.if_none_match = '*' + if properties or umask or permissions: + raise ValueError("metadata, umask and permissions can be set only when overwrite is enabled") + + if overwrite: + response = await client.create( + resource='file', + path_http_headers=path_http_headers, + properties=properties, + modified_access_conditions=modified_access_conditions, + umask=umask, + permissions=permissions, + encryption_context=encryption_context, + cls=return_response_headers, + **kwargs) + + # this modified_access_conditions will be applied to flush_data to make sure + # no other flush between create and the current flush + modified_access_conditions.if_match = response['etag'] + modified_access_conditions.if_none_match = None + modified_access_conditions.if_modified_since = None + modified_access_conditions.if_unmodified_since = None + + use_original_upload_path = file_settings.use_byte_buffer or \ + validate_content or chunk_size < file_settings.min_large_chunk_upload_threshold or \ + hasattr(stream, 'seekable') and not stream.seekable() or \ + not hasattr(stream, 'seek') or not hasattr(stream, 'tell') + + if use_original_upload_path: + await upload_data_chunks( + service=client, + uploader_class=DataLakeFileChunkUploader, + total_size=length, + chunk_size=chunk_size, + stream=stream, + max_concurrency=max_concurrency, + validate_content=validate_content, + **kwargs) + else: + await upload_substream_blocks( + service=client, + uploader_class=DataLakeFileChunkUploader, + total_size=length, + chunk_size=chunk_size, + max_concurrency=max_concurrency, + stream=stream, + validate_content=validate_content, + **kwargs + ) + + return await client.flush_data(position=length, + path_http_headers=path_http_headers, + modified_access_conditions=modified_access_conditions, + close=True, + cls=return_response_headers, + **kwargs) + except HttpResponseError as error: + process_storage_error(error) |
