diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/fsspec/tests/abstract')
8 files changed, 2278 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/__init__.py b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/__init__.py new file mode 100644 index 00000000..8ed2ad80 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/__init__.py @@ -0,0 +1,289 @@ +import os +from hashlib import md5 + +import pytest + +from fsspec.implementations.local import LocalFileSystem +from fsspec.tests.abstract.copy import AbstractCopyTests # noqa: F401 +from fsspec.tests.abstract.get import AbstractGetTests # noqa: F401 +from fsspec.tests.abstract.open import AbstractOpenTests # noqa: F401 +from fsspec.tests.abstract.pipe import AbstractPipeTests # noqa: F401 +from fsspec.tests.abstract.put import AbstractPutTests # noqa: F401 + + +class BaseAbstractFixtures: + """ + Abstract base class containing fixtures that are used by but never need to + be overridden in derived filesystem-specific classes to run the abstract + tests on such filesystems. + """ + + @pytest.fixture + def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path): + """ + Scenario on remote filesystem that is used for many cp/get/put tests. + + Cleans up at the end of each test it which it is used. + """ + source = self._bulk_operations_scenario_0(fs, fs_join, fs_path) + yield source + fs.rm(source, recursive=True) + + @pytest.fixture + def fs_glob_edge_cases_files(self, fs, fs_join, fs_path): + """ + Scenario on remote filesystem that is used for glob edge cases cp/get/put tests. + + Cleans up at the end of each test it which it is used. + """ + source = self._glob_edge_cases_files(fs, fs_join, fs_path) + yield source + fs.rm(source, recursive=True) + + @pytest.fixture + def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path): + """ + Scenario on remote filesystem that is used to check cp/get/put on directory + and file with the same name prefixes. + + Cleans up at the end of each test it which it is used. + """ + source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path) + yield source + fs.rm(source, recursive=True) + + @pytest.fixture + def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path): + """ + Scenario on remote filesystem that is used to check cp/get/put files order + when source and destination are lists. + + Cleans up at the end of each test it which it is used. + """ + source = self._10_files_with_hashed_names(fs, fs_join, fs_path) + yield source + fs.rm(source, recursive=True) + + @pytest.fixture + def fs_target(self, fs, fs_join, fs_path): + """ + Return name of remote directory that does not yet exist to copy into. + + Cleans up at the end of each test it which it is used. + """ + target = fs_join(fs_path, "target") + yield target + if fs.exists(target): + fs.rm(target, recursive=True) + + @pytest.fixture + def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path): + """ + Scenario on local filesystem that is used for many cp/get/put tests. + + Cleans up at the end of each test it which it is used. + """ + source = self._bulk_operations_scenario_0(local_fs, local_join, local_path) + yield source + local_fs.rm(source, recursive=True) + + @pytest.fixture + def local_glob_edge_cases_files(self, local_fs, local_join, local_path): + """ + Scenario on local filesystem that is used for glob edge cases cp/get/put tests. + + Cleans up at the end of each test it which it is used. + """ + source = self._glob_edge_cases_files(local_fs, local_join, local_path) + yield source + local_fs.rm(source, recursive=True) + + @pytest.fixture + def local_dir_and_file_with_same_name_prefix( + self, local_fs, local_join, local_path + ): + """ + Scenario on local filesystem that is used to check cp/get/put on directory + and file with the same name prefixes. + + Cleans up at the end of each test it which it is used. + """ + source = self._dir_and_file_with_same_name_prefix( + local_fs, local_join, local_path + ) + yield source + local_fs.rm(source, recursive=True) + + @pytest.fixture + def local_10_files_with_hashed_names(self, local_fs, local_join, local_path): + """ + Scenario on local filesystem that is used to check cp/get/put files order + when source and destination are lists. + + Cleans up at the end of each test it which it is used. + """ + source = self._10_files_with_hashed_names(local_fs, local_join, local_path) + yield source + local_fs.rm(source, recursive=True) + + @pytest.fixture + def local_target(self, local_fs, local_join, local_path): + """ + Return name of local directory that does not yet exist to copy into. + + Cleans up at the end of each test it which it is used. + """ + target = local_join(local_path, "target") + yield target + if local_fs.exists(target): + local_fs.rm(target, recursive=True) + + def _glob_edge_cases_files(self, some_fs, some_join, some_path): + """ + Scenario that is used for glob edge cases cp/get/put tests. + Creates the following directory and file structure: + + 📁 source + ├── 📄 file1 + ├── 📄 file2 + ├── 📁 subdir0 + │ ├── 📄 subfile1 + │ ├── 📄 subfile2 + │ └── 📁 nesteddir + │ └── 📄 nestedfile + └── 📁 subdir1 + ├── 📄 subfile1 + ├── 📄 subfile2 + └── 📁 nesteddir + └── 📄 nestedfile + """ + source = some_join(some_path, "source") + some_fs.touch(some_join(source, "file1")) + some_fs.touch(some_join(source, "file2")) + + for subdir_idx in range(2): + subdir = some_join(source, f"subdir{subdir_idx}") + nesteddir = some_join(subdir, "nesteddir") + some_fs.makedirs(nesteddir) + some_fs.touch(some_join(subdir, "subfile1")) + some_fs.touch(some_join(subdir, "subfile2")) + some_fs.touch(some_join(nesteddir, "nestedfile")) + + return source + + def _bulk_operations_scenario_0(self, some_fs, some_join, some_path): + """ + Scenario that is used for many cp/get/put tests. Creates the following + directory and file structure: + + 📁 source + ├── 📄 file1 + ├── 📄 file2 + └── 📁 subdir + ├── 📄 subfile1 + ├── 📄 subfile2 + └── 📁 nesteddir + └── 📄 nestedfile + """ + source = some_join(some_path, "source") + subdir = some_join(source, "subdir") + nesteddir = some_join(subdir, "nesteddir") + some_fs.makedirs(nesteddir) + some_fs.touch(some_join(source, "file1")) + some_fs.touch(some_join(source, "file2")) + some_fs.touch(some_join(subdir, "subfile1")) + some_fs.touch(some_join(subdir, "subfile2")) + some_fs.touch(some_join(nesteddir, "nestedfile")) + return source + + def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path): + """ + Scenario that is used to check cp/get/put on directory and file with + the same name prefixes. Creates the following directory and file structure: + + 📁 source + ├── 📄 subdir.txt + └── 📁 subdir + └── 📄 subfile.txt + """ + source = some_join(some_path, "source") + subdir = some_join(source, "subdir") + file = some_join(source, "subdir.txt") + subfile = some_join(subdir, "subfile.txt") + some_fs.makedirs(subdir) + some_fs.touch(file) + some_fs.touch(subfile) + return source + + def _10_files_with_hashed_names(self, some_fs, some_join, some_path): + """ + Scenario that is used to check cp/get/put files order when source and + destination are lists. Creates the following directory and file structure: + + 📁 source + └── 📄 {hashed([0-9])}.txt + """ + source = some_join(some_path, "source") + for i in range(10): + hashed_i = md5(str(i).encode("utf-8")).hexdigest() + path = some_join(source, f"{hashed_i}.txt") + some_fs.pipe(path=path, value=f"{i}".encode()) + return source + + +class AbstractFixtures(BaseAbstractFixtures): + """ + Abstract base class containing fixtures that may be overridden in derived + filesystem-specific classes to run the abstract tests on such filesystems. + + For any particular filesystem some of these fixtures must be overridden, + such as ``fs`` and ``fs_path``, and others may be overridden if the + default functions here are not appropriate, such as ``fs_join``. + """ + + @pytest.fixture + def fs(self): + raise NotImplementedError("This function must be overridden in derived classes") + + @pytest.fixture + def fs_join(self): + """ + Return a function that joins its arguments together into a path. + + Most fsspec implementations join paths in a platform-dependent way, + but some will override this to always use a forward slash. + """ + return os.path.join + + @pytest.fixture + def fs_path(self): + raise NotImplementedError("This function must be overridden in derived classes") + + @pytest.fixture(scope="class") + def local_fs(self): + # Maybe need an option for auto_mkdir=False? This is only relevant + # for certain implementations. + return LocalFileSystem(auto_mkdir=True) + + @pytest.fixture + def local_join(self): + """ + Return a function that joins its arguments together into a path, on + the local filesystem. + """ + return os.path.join + + @pytest.fixture + def local_path(self, tmpdir): + return tmpdir + + @pytest.fixture + def supports_empty_directories(self): + """ + Return whether this implementation supports empty directories. + """ + return True + + @pytest.fixture + def fs_sanitize_path(self): + return lambda x: x diff --git a/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/common.py b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/common.py new file mode 100644 index 00000000..22e7c414 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/common.py @@ -0,0 +1,175 @@ +GLOB_EDGE_CASES_TESTS = { + "argnames": ("path", "recursive", "maxdepth", "expected"), + "argvalues": [ + ("fil?1", False, None, ["file1"]), + ("fil?1", True, None, ["file1"]), + ("file[1-2]", False, None, ["file1", "file2"]), + ("file[1-2]", True, None, ["file1", "file2"]), + ("*", False, None, ["file1", "file2"]), + ( + "*", + True, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("*", True, 1, ["file1", "file2"]), + ( + "*", + True, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ("*1", False, None, ["file1"]), + ( + "*1", + True, + None, + [ + "file1", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]), + ( + "**", + False, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "**", + True, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("**", True, 1, ["file1", "file2"]), + ( + "**", + True, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "**", + False, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), + ( + "**/*1", + True, + None, + [ + "file1", + "subdir0/subfile1", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("**/*1", True, 1, ["file1"]), + ( + "**/*1", + True, + 2, + ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"], + ), + ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), + ("**/subdir0", False, None, []), + ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), + ("**/subdir0/nested*", False, 2, []), + ("**/subdir0/nested*", True, 2, ["nestedfile"]), + ("subdir[1-2]", False, None, []), + ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), + ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]), + ("subdir[0-1]", False, None, []), + ( + "subdir[0-1]", + True, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "subdir[0-1]/*fil[e]*", + False, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ( + "subdir[0-1]/*fil[e]*", + True, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ], +} diff --git a/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/copy.py b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/copy.py new file mode 100644 index 00000000..e39e57e5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/copy.py @@ -0,0 +1,557 @@ +from hashlib import md5 +from itertools import product + +import pytest + +from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS + + +class AbstractCopyTests: + def test_copy_file_to_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, + ): + # Copy scenario 1a + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) + + target_file2 = fs_join(target, "file2") + target_subfile1 = fs_join(target, "subfile1") + + # Copy from source directory + fs.cp(fs_join(source, "file2"), target) + assert fs.isfile(target_file2) + + # Copy from sub directory + fs.cp(fs_join(source, "subdir", "subfile1"), target) + assert fs.isfile(target_subfile1) + + # Remove copied files + fs.rm([target_file2, target_subfile1]) + assert not fs.exists(target_file2) + assert not fs.exists(target_subfile1) + + # Repeat with trailing slash on target + fs.cp(fs_join(source, "file2"), target + "/") + assert fs.isdir(target) + assert fs.isfile(target_file2) + + fs.cp(fs_join(source, "subdir", "subfile1"), target + "/") + assert fs.isfile(target_subfile1) + + def test_copy_file_to_new_directory( + self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + ): + # Copy scenario 1b + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + fs.cp( + fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/") + ) # Note trailing slash + assert fs.isdir(target) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + + def test_copy_file_to_file_in_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, + ): + # Copy scenario 1c + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) + + fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile")) + assert fs.isfile(fs_join(target, "newfile")) + + def test_copy_file_to_file_in_new_directory( + self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + ): + # Copy scenario 1d + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + fs.cp( + fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile") + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "newfile")) + + def test_copy_directory_to_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, + ): + # Copy scenario 1e + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + dummy = fs_join(target, "dummy") + fs.touch(dummy) + assert fs.isdir(target) + + for source_slash, target_slash in zip([False, True], [False, True]): + s = fs_join(source, "subdir") + if source_slash: + s += "/" + t = target + "/" if target_slash else target + + # Without recursive does nothing + fs.cp(s, t) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + # With recursive + fs.cp(s, t, recursive=True) + if source_slash: + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + else: + assert fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subdir", "subfile1")) + assert fs.isfile(fs_join(target, "subdir", "subfile2")) + assert fs.isdir(fs_join(target, "subdir", "nesteddir")) + assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile")) + + fs.rm(fs_join(target, "subdir"), recursive=True) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + # Limit recursive by maxdepth + fs.cp(s, t, recursive=True, maxdepth=1) + if source_slash: + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + else: + assert fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subdir", "subfile1")) + assert fs.isfile(fs_join(target, "subdir", "subfile2")) + assert not fs.exists(fs_join(target, "subdir", "nesteddir")) + + fs.rm(fs_join(target, "subdir"), recursive=True) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + def test_copy_directory_to_new_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, + ): + # Copy scenario 1f + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + for source_slash, target_slash in zip([False, True], [False, True]): + s = fs_join(source, "subdir") + if source_slash: + s += "/" + t = fs_join(target, "newdir") + if target_slash: + t += "/" + + # Without recursive does nothing + fs.cp(s, t) + if supports_empty_directories: + assert fs.ls(target) == [] + else: + with pytest.raises(FileNotFoundError): + fs.ls(target) + + # With recursive + fs.cp(s, t, recursive=True) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.cp(s, t, recursive=True, maxdepth=1) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + def test_copy_glob_to_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, + ): + # Copy scenario 1g + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + dummy = fs_join(target, "dummy") + fs.touch(dummy) + assert fs.isdir(target) + + for target_slash in [False, True]: + t = target + "/" if target_slash else target + + # Without recursive + fs.cp(fs_join(source, "subdir", "*"), t) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.isdir(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + # With recursive + for glob, recursive in zip(["*", "**"], [True, False]): + fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + # Limit recursive by maxdepth + fs.cp( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + def test_copy_glob_to_new_directory( + self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + ): + # Copy scenario 1h + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + for target_slash in [False, True]: + t = fs_join(target, "newdir") + if target_slash: + t += "/" + + # Without recursive + fs.cp(fs_join(source, "subdir", "*"), t) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # With recursive + for glob, recursive in zip(["*", "**"], [True, False]): + fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.cp( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_copy_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_glob_edge_cases_files, + fs_target, + fs_sanitize_path, + ): + # Copy scenario 1g + source = fs_glob_edge_cases_files + + target = fs_target + + for new_dir, target_slash in product([True, False], [True, False]): + fs.mkdir(target) + + t = fs_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = fs.find(target) + if new_dir: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + fs.rm(target, recursive=True) + except FileNotFoundError: + pass + + def test_copy_list_of_files_to_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, + ): + # Copy scenario 2a + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + dummy = fs_join(target, "dummy") + fs.touch(dummy) + assert fs.isdir(target) + + source_files = [ + fs_join(source, "file1"), + fs_join(source, "file2"), + fs_join(source, "subdir", "subfile1"), + ] + + for target_slash in [False, True]: + t = target + "/" if target_slash else target + + fs.cp(source_files, t) + assert fs.isfile(fs_join(target, "file1")) + assert fs.isfile(fs_join(target, "file2")) + assert fs.isfile(fs_join(target, "subfile1")) + + fs.rm( + [ + fs_join(target, "file1"), + fs_join(target, "file2"), + fs_join(target, "subfile1"), + ], + recursive=True, + ) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + def test_copy_list_of_files_to_new_directory( + self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + ): + # Copy scenario 2b + source = fs_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + source_files = [ + fs_join(source, "file1"), + fs_join(source, "file2"), + fs_join(source, "subdir", "subfile1"), + ] + + fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "file1")) + assert fs.isfile(fs_join(target, "newdir", "file2")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + + def test_copy_two_files_new_directory( + self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + ): + # This is a duplicate of test_copy_list_of_files_to_new_directory and + # can eventually be removed. + source = fs_bulk_operations_scenario_0 + + target = fs_target + assert not fs.exists(target) + fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target) + + assert fs.isdir(target) + assert fs.isfile(fs_join(target, "file1")) + assert fs.isfile(fs_join(target, "file2")) + + def test_copy_directory_without_files_with_same_name_prefix( + self, + fs, + fs_join, + fs_target, + fs_dir_and_file_with_same_name_prefix, + supports_empty_directories, + ): + # Create the test dirs + source = fs_dir_and_file_with_same_name_prefix + target = fs_target + + # Test without glob + fs.cp(fs_join(source, "subdir"), target, recursive=True) + + assert fs.isfile(fs_join(target, "subfile.txt")) + assert not fs.isfile(fs_join(target, "subdir.txt")) + + fs.rm([fs_join(target, "subfile.txt")]) + if supports_empty_directories: + assert fs.ls(target) == [] + else: + assert not fs.exists(target) + + # Test with glob + fs.cp(fs_join(source, "subdir*"), target, recursive=True) + + assert fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subdir", "subfile.txt")) + assert fs.isfile(fs_join(target, "subdir.txt")) + + def test_copy_with_source_and_destination_as_list( + self, fs, fs_target, fs_join, fs_10_files_with_hashed_names + ): + # Create the test dir + source = fs_10_files_with_hashed_names + target = fs_target + + # Create list of files for source and destination + source_files = [] + destination_files = [] + for i in range(10): + hashed_i = md5(str(i).encode("utf-8")).hexdigest() + source_files.append(fs_join(source, f"{hashed_i}.txt")) + destination_files.append(fs_join(target, f"{hashed_i}.txt")) + + # Copy and assert order was kept + fs.copy(path1=source_files, path2=destination_files) + + for i in range(10): + file_content = fs.cat(destination_files[i]).decode("utf-8") + assert file_content == str(i) diff --git a/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/get.py b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/get.py new file mode 100644 index 00000000..851ab81e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/get.py @@ -0,0 +1,587 @@ +from hashlib import md5 +from itertools import product + +import pytest + +from fsspec.implementations.local import make_path_posix +from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS + + +class AbstractGetTests: + def test_get_file_to_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1a + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + assert local_fs.isdir(target) + + target_file2 = local_join(target, "file2") + target_subfile1 = local_join(target, "subfile1") + + # Copy from source directory + fs.get(fs_join(source, "file2"), target) + assert local_fs.isfile(target_file2) + + # Copy from sub directory + fs.get(fs_join(source, "subdir", "subfile1"), target) + assert local_fs.isfile(target_subfile1) + + # Remove copied files + local_fs.rm([target_file2, target_subfile1]) + assert not local_fs.exists(target_file2) + assert not local_fs.exists(target_subfile1) + + # Repeat with trailing slash on target + fs.get(fs_join(source, "file2"), target + "/") + assert local_fs.isdir(target) + assert local_fs.isfile(target_file2) + + fs.get(fs_join(source, "subdir", "subfile1"), target + "/") + assert local_fs.isfile(target_subfile1) + + def test_get_file_to_new_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1b + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + + fs.get( + fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/") + ) # Note trailing slash + + assert local_fs.isdir(target) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + + def test_get_file_to_file_in_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1c + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + + fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile")) + assert local_fs.isfile(local_join(target, "newfile")) + + def test_get_file_to_file_in_new_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1d + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + + fs.get( + fs_join(source, "subdir", "subfile1"), + local_join(target, "newdir", "newfile"), + ) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "newfile")) + + def test_get_directory_to_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1e + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + assert local_fs.isdir(target) + + for source_slash, target_slash in zip([False, True], [False, True]): + s = fs_join(source, "subdir") + if source_slash: + s += "/" + t = target + "/" if target_slash else target + + # Without recursive does nothing + fs.get(s, t) + assert local_fs.ls(target) == [] + + # With recursive + fs.get(s, t, recursive=True) + if source_slash: + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert local_fs.isdir(local_join(target, "nesteddir")) + assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + local_join(target, "nesteddir"), + ], + recursive=True, + ) + else: + assert local_fs.isdir(local_join(target, "subdir")) + assert local_fs.isfile(local_join(target, "subdir", "subfile1")) + assert local_fs.isfile(local_join(target, "subdir", "subfile2")) + assert local_fs.isdir(local_join(target, "subdir", "nesteddir")) + assert local_fs.isfile( + local_join(target, "subdir", "nesteddir", "nestedfile") + ) + + local_fs.rm(local_join(target, "subdir"), recursive=True) + assert local_fs.ls(target) == [] + + # Limit recursive by maxdepth + fs.get(s, t, recursive=True, maxdepth=1) + if source_slash: + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert not local_fs.exists(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + else: + assert local_fs.isdir(local_join(target, "subdir")) + assert local_fs.isfile(local_join(target, "subdir", "subfile1")) + assert local_fs.isfile(local_join(target, "subdir", "subfile2")) + assert not local_fs.exists(local_join(target, "subdir", "nesteddir")) + + local_fs.rm(local_join(target, "subdir"), recursive=True) + assert local_fs.ls(target) == [] + + def test_get_directory_to_new_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1f + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + + for source_slash, target_slash in zip([False, True], [False, True]): + s = fs_join(source, "subdir") + if source_slash: + s += "/" + t = local_join(target, "newdir") + if target_slash: + t += "/" + + # Without recursive does nothing + fs.get(s, t) + assert local_fs.ls(target) == [] + + # With recursive + fs.get(s, t, recursive=True) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert local_fs.isdir(local_join(target, "newdir", "nesteddir")) + assert local_fs.isfile( + local_join(target, "newdir", "nesteddir", "nestedfile") + ) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm(local_join(target, "newdir"), recursive=True) + assert local_fs.ls(target) == [] + + # Limit recursive by maxdepth + fs.get(s, t, recursive=True, maxdepth=1) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm(local_join(target, "newdir"), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) + + def test_get_glob_to_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1g + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + + for target_slash in [False, True]: + t = target + "/" if target_slash else target + + # Without recursive + fs.get(fs_join(source, "subdir", "*"), t) + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert not local_fs.isdir(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] + + # With recursive + for glob, recursive in zip(["*", "**"], [True, False]): + fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert local_fs.isdir(local_join(target, "nesteddir")) + assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + local_join(target, "nesteddir"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] + + # Limit recursive by maxdepth + fs.get( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert not local_fs.exists(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] + + def test_get_glob_to_new_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1h + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + + for target_slash in [False, True]: + t = fs_join(target, "newdir") + if target_slash: + t += "/" + + # Without recursive + fs.get(fs_join(source, "subdir", "*"), t) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists( + local_join(target, "newdir", "nesteddir", "nestedfile") + ) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) + + local_fs.rm(local_join(target, "newdir"), recursive=True) + assert local_fs.ls(target) == [] + + # With recursive + for glob, recursive in zip(["*", "**"], [True, False]): + fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert local_fs.isdir(local_join(target, "newdir", "nesteddir")) + assert local_fs.isfile( + local_join(target, "newdir", "nesteddir", "nestedfile") + ) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) + + local_fs.rm(local_join(target, "newdir"), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.get( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) + + local_fs.rm(local_fs.ls(target, detail=False), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) + + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_get_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_glob_edge_cases_files, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1g + source = fs_glob_edge_cases_files + + target = local_target + + for new_dir, target_slash in product([True, False], [True, False]): + local_fs.mkdir(target) + + t = local_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = local_fs.find(target) + if new_dir: + prefixed_expected = [ + make_path_posix(local_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + make_path_posix(local_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + local_fs.rm(target, recursive=True) + except FileNotFoundError: + pass + + def test_get_list_of_files_to_existing_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 2a + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + + source_files = [ + fs_join(source, "file1"), + fs_join(source, "file2"), + fs_join(source, "subdir", "subfile1"), + ] + + for target_slash in [False, True]: + t = target + "/" if target_slash else target + + fs.get(source_files, t) + assert local_fs.isfile(local_join(target, "file1")) + assert local_fs.isfile(local_join(target, "file2")) + assert local_fs.isfile(local_join(target, "subfile1")) + + local_fs.rm( + [ + local_join(target, "file1"), + local_join(target, "file2"), + local_join(target, "subfile1"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] + + def test_get_list_of_files_to_new_directory( + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + local_fs, + local_join, + local_target, + ): + # Copy scenario 2b + source = fs_bulk_operations_scenario_0 + + target = local_target + local_fs.mkdir(target) + + source_files = [ + fs_join(source, "file1"), + fs_join(source, "file2"), + fs_join(source, "subdir", "subfile1"), + ] + + fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "file1")) + assert local_fs.isfile(local_join(target, "newdir", "file2")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + + def test_get_directory_recursive( + self, fs, fs_join, fs_path, local_fs, local_join, local_target + ): + # https://github.com/fsspec/filesystem_spec/issues/1062 + # Recursive cp/get/put of source directory into non-existent target directory. + src = fs_join(fs_path, "src") + src_file = fs_join(src, "file") + fs.mkdir(src) + fs.touch(src_file) + + target = local_target + + # get without slash + assert not local_fs.exists(target) + for loop in range(2): + fs.get(src, target, recursive=True) + assert local_fs.isdir(target) + + if loop == 0: + assert local_fs.isfile(local_join(target, "file")) + assert not local_fs.exists(local_join(target, "src")) + else: + assert local_fs.isfile(local_join(target, "file")) + assert local_fs.isdir(local_join(target, "src")) + assert local_fs.isfile(local_join(target, "src", "file")) + + local_fs.rm(target, recursive=True) + + # get with slash + assert not local_fs.exists(target) + for loop in range(2): + fs.get(src + "/", target, recursive=True) + assert local_fs.isdir(target) + assert local_fs.isfile(local_join(target, "file")) + assert not local_fs.exists(local_join(target, "src")) + + def test_get_directory_without_files_with_same_name_prefix( + self, + fs, + fs_join, + local_fs, + local_join, + local_target, + fs_dir_and_file_with_same_name_prefix, + ): + # Create the test dirs + source = fs_dir_and_file_with_same_name_prefix + target = local_target + + # Test without glob + fs.get(fs_join(source, "subdir"), target, recursive=True) + + assert local_fs.isfile(local_join(target, "subfile.txt")) + assert not local_fs.isfile(local_join(target, "subdir.txt")) + + local_fs.rm([local_join(target, "subfile.txt")]) + assert local_fs.ls(target) == [] + + # Test with glob + fs.get(fs_join(source, "subdir*"), target, recursive=True) + + assert local_fs.isdir(local_join(target, "subdir")) + assert local_fs.isfile(local_join(target, "subdir", "subfile.txt")) + assert local_fs.isfile(local_join(target, "subdir.txt")) + + def test_get_with_source_and_destination_as_list( + self, + fs, + fs_join, + local_fs, + local_join, + local_target, + fs_10_files_with_hashed_names, + ): + # Create the test dir + source = fs_10_files_with_hashed_names + target = local_target + + # Create list of files for source and destination + source_files = [] + destination_files = [] + for i in range(10): + hashed_i = md5(str(i).encode("utf-8")).hexdigest() + source_files.append(fs_join(source, f"{hashed_i}.txt")) + destination_files.append( + make_path_posix(local_join(target, f"{hashed_i}.txt")) + ) + + # Copy and assert order was kept + fs.get(rpath=source_files, lpath=destination_files) + + for i in range(10): + file_content = local_fs.cat(destination_files[i]).decode("utf-8") + assert file_content == str(i) diff --git a/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/mv.py b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/mv.py new file mode 100644 index 00000000..39f6caa3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/mv.py @@ -0,0 +1,57 @@ +import os + +import pytest + +import fsspec + + +def test_move_raises_error_with_tmpdir(tmpdir): + # Create a file in the temporary directory + source = tmpdir.join("source_file.txt") + source.write("content") + + # Define a destination that simulates a protected or invalid path + destination = tmpdir.join("non_existent_directory/destination_file.txt") + + # Instantiate the filesystem (assuming the local file system interface) + fs = fsspec.filesystem("file") + + # Use the actual file paths as string + with pytest.raises(FileNotFoundError): + fs.mv(str(source), str(destination)) + + +@pytest.mark.parametrize("recursive", (True, False)) +def test_move_raises_error_with_tmpdir_permission(recursive, tmpdir): + # Create a file in the temporary directory + source = tmpdir.join("source_file.txt") + source.write("content") + + # Create a protected directory (non-writable) + protected_dir = tmpdir.mkdir("protected_directory") + protected_path = str(protected_dir) + + # Set the directory to read-only + if os.name == "nt": + os.system(f'icacls "{protected_path}" /deny Everyone:(W)') + else: + os.chmod(protected_path, 0o555) # Sets the directory to read-only + + # Define a destination inside the protected directory + destination = protected_dir.join("destination_file.txt") + + # Instantiate the filesystem (assuming the local file system interface) + fs = fsspec.filesystem("file") + + # Try to move the file to the read-only directory, expecting a permission error + with pytest.raises(PermissionError): + fs.mv(str(source), str(destination), recursive=recursive) + + # Assert the file was not created in the destination + assert not os.path.exists(destination) + + # Cleanup: Restore permissions so the directory can be cleaned up + if os.name == "nt": + os.system(f'icacls "{protected_path}" /remove:d Everyone') + else: + os.chmod(protected_path, 0o755) # Restore write permission for cleanup diff --git a/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/open.py b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/open.py new file mode 100644 index 00000000..bb75ea85 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/open.py @@ -0,0 +1,11 @@ +import pytest + + +class AbstractOpenTests: + def test_open_exclusive(self, fs, fs_target): + with fs.open(fs_target, "wb") as f: + f.write(b"data") + with fs.open(fs_target, "rb") as f: + assert f.read() == b"data" + with pytest.raises(FileExistsError): + fs.open(fs_target, "xb") diff --git a/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/pipe.py b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/pipe.py new file mode 100644 index 00000000..8ecca96e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/pipe.py @@ -0,0 +1,11 @@ +import pytest + + +class AbstractPipeTests: + def test_pipe_exclusive(self, fs, fs_target): + fs.pipe_file(fs_target, b"data") + assert fs.cat_file(fs_target) == b"data" + with pytest.raises(FileExistsError): + fs.pipe_file(fs_target, b"data", mode="create") + fs.pipe_file(fs_target, b"new data", mode="overwrite") + assert fs.cat_file(fs_target) == b"new data" diff --git a/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/put.py b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/put.py new file mode 100644 index 00000000..9fc34997 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/fsspec/tests/abstract/put.py @@ -0,0 +1,591 @@ +from hashlib import md5 +from itertools import product + +import pytest + +from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS + + +class AbstractPutTests: + def test_put_file_to_existing_directory( + self, + fs, + fs_join, + fs_target, + local_join, + local_bulk_operations_scenario_0, + supports_empty_directories, + ): + # Copy scenario 1a + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) + + target_file2 = fs_join(target, "file2") + target_subfile1 = fs_join(target, "subfile1") + + # Copy from source directory + fs.put(local_join(source, "file2"), target) + assert fs.isfile(target_file2) + + # Copy from sub directory + fs.put(local_join(source, "subdir", "subfile1"), target) + assert fs.isfile(target_subfile1) + + # Remove copied files + fs.rm([target_file2, target_subfile1]) + assert not fs.exists(target_file2) + assert not fs.exists(target_subfile1) + + # Repeat with trailing slash on target + fs.put(local_join(source, "file2"), target + "/") + assert fs.isdir(target) + assert fs.isfile(target_file2) + + fs.put(local_join(source, "subdir", "subfile1"), target + "/") + assert fs.isfile(target_subfile1) + + def test_put_file_to_new_directory( + self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 + ): + # Copy scenario 1b + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + fs.put( + local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/") + ) # Note trailing slash + assert fs.isdir(target) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + + def test_put_file_to_file_in_existing_directory( + self, + fs, + fs_join, + fs_target, + local_join, + supports_empty_directories, + local_bulk_operations_scenario_0, + ): + # Copy scenario 1c + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) + + fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile")) + assert fs.isfile(fs_join(target, "newfile")) + + def test_put_file_to_file_in_new_directory( + self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 + ): + # Copy scenario 1d + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + fs.put( + local_join(source, "subdir", "subfile1"), + fs_join(target, "newdir", "newfile"), + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "newfile")) + + def test_put_directory_to_existing_directory( + self, + fs, + fs_join, + fs_target, + local_bulk_operations_scenario_0, + supports_empty_directories, + ): + # Copy scenario 1e + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + dummy = fs_join(target, "dummy") + fs.touch(dummy) + assert fs.isdir(target) + + for source_slash, target_slash in zip([False, True], [False, True]): + s = fs_join(source, "subdir") + if source_slash: + s += "/" + t = target + "/" if target_slash else target + + # Without recursive does nothing + fs.put(s, t) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + # With recursive + fs.put(s, t, recursive=True) + if source_slash: + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + else: + assert fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subdir", "subfile1")) + assert fs.isfile(fs_join(target, "subdir", "subfile2")) + assert fs.isdir(fs_join(target, "subdir", "nesteddir")) + assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile")) + + fs.rm(fs_join(target, "subdir"), recursive=True) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + # Limit recursive by maxdepth + fs.put(s, t, recursive=True, maxdepth=1) + if source_slash: + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + else: + assert fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subdir", "subfile1")) + assert fs.isfile(fs_join(target, "subdir", "subfile2")) + assert not fs.exists(fs_join(target, "subdir", "nesteddir")) + + fs.rm(fs_join(target, "subdir"), recursive=True) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + def test_put_directory_to_new_directory( + self, + fs, + fs_join, + fs_target, + local_bulk_operations_scenario_0, + supports_empty_directories, + ): + # Copy scenario 1f + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + for source_slash, target_slash in zip([False, True], [False, True]): + s = fs_join(source, "subdir") + if source_slash: + s += "/" + t = fs_join(target, "newdir") + if target_slash: + t += "/" + + # Without recursive does nothing + fs.put(s, t) + if supports_empty_directories: + assert fs.ls(target) == [] + else: + with pytest.raises(FileNotFoundError): + fs.ls(target) + + # With recursive + fs.put(s, t, recursive=True) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.put(s, t, recursive=True, maxdepth=1) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + def test_put_glob_to_existing_directory( + self, + fs, + fs_join, + fs_target, + local_join, + supports_empty_directories, + local_bulk_operations_scenario_0, + ): + # Copy scenario 1g + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + dummy = fs_join(target, "dummy") + fs.touch(dummy) + assert fs.isdir(target) + + for target_slash in [False, True]: + t = target + "/" if target_slash else target + + # Without recursive + fs.put(local_join(source, "subdir", "*"), t) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.isdir(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + # With recursive + for glob, recursive in zip(["*", "**"], [True, False]): + fs.put(local_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + # Limit recursive by maxdepth + fs.put( + local_join(source, "subdir", glob), + t, + recursive=recursive, + maxdepth=1, + ) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + def test_put_glob_to_new_directory( + self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 + ): + # Copy scenario 1h + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + for target_slash in [False, True]: + t = fs_join(target, "newdir") + if target_slash: + t += "/" + + # Without recursive + fs.put(local_join(source, "subdir", "*"), t) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # With recursive + for glob, recursive in zip(["*", "**"], [True, False]): + fs.put(local_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.put( + local_join(source, "subdir", glob), + t, + recursive=recursive, + maxdepth=1, + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_put_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_target, + local_glob_edge_cases_files, + local_join, + fs_sanitize_path, + ): + # Copy scenario 1g + source = local_glob_edge_cases_files + + target = fs_target + + for new_dir, target_slash in product([True, False], [True, False]): + fs.mkdir(target) + + t = fs_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = fs.find(target) + if new_dir: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + fs.rm(target, recursive=True) + except FileNotFoundError: + pass + + def test_put_list_of_files_to_existing_directory( + self, + fs, + fs_join, + fs_target, + local_join, + local_bulk_operations_scenario_0, + supports_empty_directories, + ): + # Copy scenario 2a + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + dummy = fs_join(target, "dummy") + fs.touch(dummy) + assert fs.isdir(target) + + source_files = [ + local_join(source, "file1"), + local_join(source, "file2"), + local_join(source, "subdir", "subfile1"), + ] + + for target_slash in [False, True]: + t = target + "/" if target_slash else target + + fs.put(source_files, t) + assert fs.isfile(fs_join(target, "file1")) + assert fs.isfile(fs_join(target, "file2")) + assert fs.isfile(fs_join(target, "subfile1")) + + fs.rm( + [ + fs_join(target, "file1"), + fs_join(target, "file2"), + fs_join(target, "subfile1"), + ], + recursive=True, + ) + assert fs.ls(target, detail=False) == ( + [] if supports_empty_directories else [dummy] + ) + + def test_put_list_of_files_to_new_directory( + self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 + ): + # Copy scenario 2b + source = local_bulk_operations_scenario_0 + + target = fs_target + fs.mkdir(target) + + source_files = [ + local_join(source, "file1"), + local_join(source, "file2"), + local_join(source, "subdir", "subfile1"), + ] + + fs.put(source_files, fs_join(target, "newdir") + "/") # Note trailing slash + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "file1")) + assert fs.isfile(fs_join(target, "newdir", "file2")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + + def test_put_directory_recursive( + self, fs, fs_join, fs_target, local_fs, local_join, local_path + ): + # https://github.com/fsspec/filesystem_spec/issues/1062 + # Recursive cp/get/put of source directory into non-existent target directory. + src = local_join(local_path, "src") + src_file = local_join(src, "file") + local_fs.mkdir(src) + local_fs.touch(src_file) + + target = fs_target + + # put without slash + assert not fs.exists(target) + for loop in range(2): + fs.put(src, target, recursive=True) + assert fs.isdir(target) + + if loop == 0: + assert fs.isfile(fs_join(target, "file")) + assert not fs.exists(fs_join(target, "src")) + else: + assert fs.isfile(fs_join(target, "file")) + assert fs.isdir(fs_join(target, "src")) + assert fs.isfile(fs_join(target, "src", "file")) + + fs.rm(target, recursive=True) + + # put with slash + assert not fs.exists(target) + for loop in range(2): + fs.put(src + "/", target, recursive=True) + assert fs.isdir(target) + assert fs.isfile(fs_join(target, "file")) + assert not fs.exists(fs_join(target, "src")) + + def test_put_directory_without_files_with_same_name_prefix( + self, + fs, + fs_join, + fs_target, + local_join, + local_dir_and_file_with_same_name_prefix, + supports_empty_directories, + ): + # Create the test dirs + source = local_dir_and_file_with_same_name_prefix + target = fs_target + + # Test without glob + fs.put(local_join(source, "subdir"), fs_target, recursive=True) + + assert fs.isfile(fs_join(fs_target, "subfile.txt")) + assert not fs.isfile(fs_join(fs_target, "subdir.txt")) + + fs.rm([fs_join(target, "subfile.txt")]) + if supports_empty_directories: + assert fs.ls(target) == [] + else: + assert not fs.exists(target) + + # Test with glob + fs.put(local_join(source, "subdir*"), fs_target, recursive=True) + + assert fs.isdir(fs_join(fs_target, "subdir")) + assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt")) + assert fs.isfile(fs_join(fs_target, "subdir.txt")) + + def test_copy_with_source_and_destination_as_list( + self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names + ): + # Create the test dir + source = local_10_files_with_hashed_names + target = fs_target + + # Create list of files for source and destination + source_files = [] + destination_files = [] + for i in range(10): + hashed_i = md5(str(i).encode("utf-8")).hexdigest() + source_files.append(local_join(source, f"{hashed_i}.txt")) + destination_files.append(fs_join(target, f"{hashed_i}.txt")) + + # Copy and assert order was kept + fs.put(lpath=source_files, rpath=destination_files) + + for i in range(10): + file_content = fs.cat(destination_files[i]).decode("utf-8") + assert file_content == str(i) |