aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/fsspec/implementations/smb.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/fsspec/implementations/smb.py')
-rw-r--r--.venv/lib/python3.12/site-packages/fsspec/implementations/smb.py416
1 files changed, 416 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/fsspec/implementations/smb.py b/.venv/lib/python3.12/site-packages/fsspec/implementations/smb.py
new file mode 100644
index 00000000..db6b3f5c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/fsspec/implementations/smb.py
@@ -0,0 +1,416 @@
+"""
+This module contains SMBFileSystem class responsible for handling access to
+Windows Samba network shares by using package smbprotocol
+"""
+
+import datetime
+import re
+import uuid
+from stat import S_ISDIR, S_ISLNK
+
+import smbclient
+import smbprotocol.exceptions
+
+from .. import AbstractFileSystem
+from ..utils import infer_storage_options
+
+# ! pylint: disable=bad-continuation
+
+
+class SMBFileSystem(AbstractFileSystem):
+ """Allow reading and writing to Windows and Samba network shares.
+
+ When using `fsspec.open()` for getting a file-like object the URI
+ should be specified as this format:
+ ``smb://workgroup;user:password@server:port/share/folder/file.csv``.
+
+ Example::
+
+ >>> import fsspec
+ >>> with fsspec.open(
+ ... 'smb://myuser:mypassword@myserver.com/' 'share/folder/file.csv'
+ ... ) as smbfile:
+ ... df = pd.read_csv(smbfile, sep='|', header=None)
+
+ Note that you need to pass in a valid hostname or IP address for the host
+ component of the URL. Do not use the Windows/NetBIOS machine name for the
+ host component.
+
+ The first component of the path in the URL points to the name of the shared
+ folder. Subsequent path components will point to the directory/folder/file.
+
+ The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
+ optional.
+
+ .. note::
+
+ For working this source require `smbprotocol`_ to be installed, e.g.::
+
+ $ pip install smbprotocol
+ # or
+ # pip install smbprotocol[kerberos]
+
+ .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
+
+ Note: if using this with the ``open`` or ``open_files``, with full URLs,
+ there is no way to tell if a path is relative, so all paths are assumed
+ to be absolute.
+ """
+
+ protocol = "smb"
+
+ # pylint: disable=too-many-arguments
+ def __init__(
+ self,
+ host,
+ port=None,
+ username=None,
+ password=None,
+ timeout=60,
+ encrypt=None,
+ share_access=None,
+ register_session_retries=4,
+ register_session_retry_wait=1,
+ register_session_retry_factor=10,
+ auto_mkdir=False,
+ **kwargs,
+ ):
+ """
+ You can use _get_kwargs_from_urls to get some kwargs from
+ a reasonable SMB url.
+
+ Authentication will be anonymous or integrated if username/password are not
+ given.
+
+ Parameters
+ ----------
+ host: str
+ The remote server name/ip to connect to
+ port: int or None
+ Port to connect with. Usually 445, sometimes 139.
+ username: str or None
+ Username to connect with. Required if Kerberos auth is not being used.
+ password: str or None
+ User's password on the server, if using username
+ timeout: int
+ Connection timeout in seconds
+ encrypt: bool
+ Whether to force encryption or not, once this has been set to True
+ the session cannot be changed back to False.
+ share_access: str or None
+ Specifies the default access applied to file open operations
+ performed with this file system object.
+ This affects whether other processes can concurrently open a handle
+ to the same file.
+
+ - None (the default): exclusively locks the file until closed.
+ - 'r': Allow other handles to be opened with read access.
+ - 'w': Allow other handles to be opened with write access.
+ - 'd': Allow other handles to be opened with delete access.
+ register_session_retries: int
+ Number of retries to register a session with the server. Retries are not performed
+ for authentication errors, as they are considered as invalid credentials and not network
+ issues. If set to negative value, no register attempts will be performed.
+ register_session_retry_wait: int
+ Time in seconds to wait between each retry. Number must be non-negative.
+ register_session_retry_factor: int
+ Base factor for the wait time between each retry. The wait time
+ is calculated using exponential function. For factor=1 all wait times
+ will be equal to `register_session_retry_wait`. For any number of retries,
+ the last wait time will be equal to `register_session_retry_wait` and for retries>1
+ the first wait time will be equal to `register_session_retry_wait / factor`.
+ Number must be equal to or greater than 1. Optimal factor is 10.
+ auto_mkdir: bool
+ Whether, when opening a file, the directory containing it should
+ be created (if it doesn't already exist). This is assumed by pyarrow
+ and zarr-python code.
+ """
+ super().__init__(**kwargs)
+ self.host = host
+ self.port = port
+ self.username = username
+ self.password = password
+ self.timeout = timeout
+ self.encrypt = encrypt
+ self.temppath = kwargs.pop("temppath", "")
+ self.share_access = share_access
+ self.register_session_retries = register_session_retries
+ if register_session_retry_wait < 0:
+ raise ValueError(
+ "register_session_retry_wait must be a non-negative integer"
+ )
+ self.register_session_retry_wait = register_session_retry_wait
+ if register_session_retry_factor < 1:
+ raise ValueError(
+ "register_session_retry_factor must be a positive "
+ "integer equal to or greater than 1"
+ )
+ self.register_session_retry_factor = register_session_retry_factor
+ self.auto_mkdir = auto_mkdir
+ self._connect()
+
+ @property
+ def _port(self):
+ return 445 if self.port is None else self.port
+
+ def _connect(self):
+ import time
+
+ if self.register_session_retries <= -1:
+ return
+
+ retried_errors = []
+
+ wait_time = self.register_session_retry_wait
+ n_waits = (
+ self.register_session_retries - 1
+ ) # -1 = No wait time after the last retry
+ factor = self.register_session_retry_factor
+
+ # Generate wait times for each retry attempt.
+ # Wait times are calculated using exponential function. For factor=1 all wait times
+ # will be equal to `wait`. For any number of retries the last wait time will be
+ # equal to `wait` and for retries>2 the first wait time will be equal to `wait / factor`.
+ wait_times = iter(
+ factor ** (n / n_waits - 1) * wait_time for n in range(0, n_waits + 1)
+ )
+
+ for attempt in range(self.register_session_retries + 1):
+ try:
+ smbclient.register_session(
+ self.host,
+ username=self.username,
+ password=self.password,
+ port=self._port,
+ encrypt=self.encrypt,
+ connection_timeout=self.timeout,
+ )
+ return
+ except (
+ smbprotocol.exceptions.SMBAuthenticationError,
+ smbprotocol.exceptions.LogonFailure,
+ ):
+ # These exceptions should not be repeated, as they clearly indicate
+ # that the credentials are invalid and not a network issue.
+ raise
+ except ValueError as exc:
+ if re.findall(r"\[Errno -\d+]", str(exc)):
+ # This exception is raised by the smbprotocol.transport:Tcp.connect
+ # and originates from socket.gaierror (OSError). These exceptions might
+ # be raised due to network instability. We will retry to connect.
+ retried_errors.append(exc)
+ else:
+ # All another ValueError exceptions should be raised, as they are not
+ # related to network issues.
+ raise
+ except Exception as exc:
+ # Save the exception and retry to connect. This except might be dropped
+ # in the future, once all exceptions suited for retry are identified.
+ retried_errors.append(exc)
+
+ if attempt < self.register_session_retries:
+ time.sleep(next(wait_times))
+
+ # Raise last exception to inform user about the connection issues.
+ # Note: Should we use ExceptionGroup to raise all exceptions?
+ raise retried_errors[-1]
+
+ @classmethod
+ def _strip_protocol(cls, path):
+ return infer_storage_options(path)["path"]
+
+ @staticmethod
+ def _get_kwargs_from_urls(path):
+ # smb://workgroup;user:password@host:port/share/folder/file.csv
+ out = infer_storage_options(path)
+ out.pop("path", None)
+ out.pop("protocol", None)
+ return out
+
+ def mkdir(self, path, create_parents=True, **kwargs):
+ wpath = _as_unc_path(self.host, path)
+ if create_parents:
+ smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
+ else:
+ smbclient.mkdir(wpath, port=self._port, **kwargs)
+
+ def makedirs(self, path, exist_ok=False):
+ if _share_has_path(path):
+ wpath = _as_unc_path(self.host, path)
+ smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
+
+ def rmdir(self, path):
+ if _share_has_path(path):
+ wpath = _as_unc_path(self.host, path)
+ smbclient.rmdir(wpath, port=self._port)
+
+ def info(self, path, **kwargs):
+ wpath = _as_unc_path(self.host, path)
+ stats = smbclient.stat(wpath, port=self._port, **kwargs)
+ if S_ISDIR(stats.st_mode):
+ stype = "directory"
+ elif S_ISLNK(stats.st_mode):
+ stype = "link"
+ else:
+ stype = "file"
+ res = {
+ "name": path + "/" if stype == "directory" else path,
+ "size": stats.st_size,
+ "type": stype,
+ "uid": stats.st_uid,
+ "gid": stats.st_gid,
+ "time": stats.st_atime,
+ "mtime": stats.st_mtime,
+ }
+ return res
+
+ def created(self, path):
+ """Return the created timestamp of a file as a datetime.datetime"""
+ wpath = _as_unc_path(self.host, path)
+ stats = smbclient.stat(wpath, port=self._port)
+ return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
+
+ def modified(self, path):
+ """Return the modified timestamp of a file as a datetime.datetime"""
+ wpath = _as_unc_path(self.host, path)
+ stats = smbclient.stat(wpath, port=self._port)
+ return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
+
+ def ls(self, path, detail=True, **kwargs):
+ unc = _as_unc_path(self.host, path)
+ listed = smbclient.listdir(unc, port=self._port, **kwargs)
+ dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
+ if detail:
+ dirs = [self.info(d) for d in dirs]
+ return dirs
+
+ # pylint: disable=too-many-arguments
+ def _open(
+ self,
+ path,
+ mode="rb",
+ block_size=-1,
+ autocommit=True,
+ cache_options=None,
+ **kwargs,
+ ):
+ """
+ block_size: int or None
+ If 0, no buffering, 1, line buffering, >1, buffer that many bytes
+
+ Notes
+ -----
+ By specifying 'share_access' in 'kwargs' it is possible to override the
+ default shared access setting applied in the constructor of this object.
+ """
+ if self.auto_mkdir and "w" in mode:
+ self.makedirs(self._parent(path), exist_ok=True)
+ bls = block_size if block_size is not None and block_size >= 0 else -1
+ wpath = _as_unc_path(self.host, path)
+ share_access = kwargs.pop("share_access", self.share_access)
+ if "w" in mode and autocommit is False:
+ temp = _as_temp_path(self.host, path, self.temppath)
+ return SMBFileOpener(
+ wpath, temp, mode, port=self._port, block_size=bls, **kwargs
+ )
+ return smbclient.open_file(
+ wpath,
+ mode,
+ buffering=bls,
+ share_access=share_access,
+ port=self._port,
+ **kwargs,
+ )
+
+ def copy(self, path1, path2, **kwargs):
+ """Copy within two locations in the same filesystem"""
+ wpath1 = _as_unc_path(self.host, path1)
+ wpath2 = _as_unc_path(self.host, path2)
+ if self.auto_mkdir:
+ self.makedirs(self._parent(path2), exist_ok=True)
+ smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
+
+ def _rm(self, path):
+ if _share_has_path(path):
+ wpath = _as_unc_path(self.host, path)
+ stats = smbclient.stat(wpath, port=self._port)
+ if S_ISDIR(stats.st_mode):
+ smbclient.rmdir(wpath, port=self._port)
+ else:
+ smbclient.remove(wpath, port=self._port)
+
+ def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
+ wpath1 = _as_unc_path(self.host, path1)
+ wpath2 = _as_unc_path(self.host, path2)
+ smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
+
+
+def _as_unc_path(host, path):
+ rpath = path.replace("/", "\\")
+ unc = f"\\\\{host}{rpath}"
+ return unc
+
+
+def _as_temp_path(host, path, temppath):
+ share = path.split("/")[1]
+ temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
+ unc = _as_unc_path(host, temp_file)
+ return unc
+
+
+def _share_has_path(path):
+ parts = path.count("/")
+ if path.endswith("/"):
+ return parts > 2
+ return parts > 1
+
+
+class SMBFileOpener:
+ """writes to remote temporary file, move on commit"""
+
+ def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
+ self.path = path
+ self.temp = temp
+ self.mode = mode
+ self.block_size = block_size
+ self.kwargs = kwargs
+ self.smbfile = None
+ self._incontext = False
+ self.port = port
+ self._open()
+
+ def _open(self):
+ if self.smbfile is None or self.smbfile.closed:
+ self.smbfile = smbclient.open_file(
+ self.temp,
+ self.mode,
+ port=self.port,
+ buffering=self.block_size,
+ **self.kwargs,
+ )
+
+ def commit(self):
+ """Move temp file to definitive on success."""
+ # TODO: use transaction support in SMB protocol
+ smbclient.replace(self.temp, self.path, port=self.port)
+
+ def discard(self):
+ """Remove the temp file on failure."""
+ smbclient.remove(self.temp, port=self.port)
+
+ def __fspath__(self):
+ return self.path
+
+ def __iter__(self):
+ return self.smbfile.__iter__()
+
+ def __getattr__(self, item):
+ return getattr(self.smbfile, item)
+
+ def __enter__(self):
+ self._incontext = True
+ return self.smbfile.__enter__()
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self._incontext = False
+ self.smbfile.__exit__(exc_type, exc_value, traceback)