aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/fsspec/implementations/git.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/fsspec/implementations/git.py')
-rw-r--r--.venv/lib/python3.12/site-packages/fsspec/implementations/git.py115
1 files changed, 115 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/fsspec/implementations/git.py b/.venv/lib/python3.12/site-packages/fsspec/implementations/git.py
new file mode 100644
index 00000000..7b9d3539
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/fsspec/implementations/git.py
@@ -0,0 +1,115 @@
+import os
+
+import pygit2
+
+from fsspec.spec import AbstractFileSystem
+
+from .memory import MemoryFile
+
+
+class GitFileSystem(AbstractFileSystem):
+ """Browse the files of a local git repo at any hash/tag/branch
+
+ (experimental backend)
+ """
+
+ root_marker = ""
+ cachable = True
+
+ def __init__(self, path=None, fo=None, ref=None, **kwargs):
+ """
+
+ Parameters
+ ----------
+ path: str (optional)
+ Local location of the repo (uses current directory if not given).
+ May be deprecated in favour of ``fo``. When used with a higher
+ level function such as fsspec.open(), may be of the form
+ "git://[path-to-repo[:]][ref@]path/to/file" (but the actual
+ file path should not contain "@" or ":").
+ fo: str (optional)
+ Same as ``path``, but passed as part of a chained URL. This one
+ takes precedence if both are given.
+ ref: str (optional)
+ Reference to work with, could be a hash, tag or branch name. Defaults
+ to current working tree. Note that ``ls`` and ``open`` also take hash,
+ so this becomes the default for those operations
+ kwargs
+ """
+ super().__init__(**kwargs)
+ self.repo = pygit2.Repository(fo or path or os.getcwd())
+ self.ref = ref or "master"
+
+ @classmethod
+ def _strip_protocol(cls, path):
+ path = super()._strip_protocol(path).lstrip("/")
+ if ":" in path:
+ path = path.split(":", 1)[1]
+ if "@" in path:
+ path = path.split("@", 1)[1]
+ return path.lstrip("/")
+
+ def _path_to_object(self, path, ref):
+ comm, ref = self.repo.resolve_refish(ref or self.ref)
+ parts = path.split("/")
+ tree = comm.tree
+ for part in parts:
+ if part and isinstance(tree, pygit2.Tree):
+ if part not in tree:
+ raise FileNotFoundError(path)
+ tree = tree[part]
+ return tree
+
+ @staticmethod
+ def _get_kwargs_from_urls(path):
+ if path.startswith("git://"):
+ path = path[6:]
+ out = {}
+ if ":" in path:
+ out["path"], path = path.split(":", 1)
+ if "@" in path:
+ out["ref"], path = path.split("@", 1)
+ return out
+
+ @staticmethod
+ def _object_to_info(obj, path=None):
+ # obj.name and obj.filemode are None for the root tree!
+ is_dir = isinstance(obj, pygit2.Tree)
+ return {
+ "type": "directory" if is_dir else "file",
+ "name": (
+ "/".join([path, obj.name or ""]).lstrip("/") if path else obj.name
+ ),
+ "hex": str(obj.id),
+ "mode": "100644" if obj.filemode is None else f"{obj.filemode:o}",
+ "size": 0 if is_dir else obj.size,
+ }
+
+ def ls(self, path, detail=True, ref=None, **kwargs):
+ tree = self._path_to_object(self._strip_protocol(path), ref)
+ return [
+ GitFileSystem._object_to_info(obj, path)
+ if detail
+ else GitFileSystem._object_to_info(obj, path)["name"]
+ for obj in (tree if isinstance(tree, pygit2.Tree) else [tree])
+ ]
+
+ def info(self, path, ref=None, **kwargs):
+ tree = self._path_to_object(self._strip_protocol(path), ref)
+ return GitFileSystem._object_to_info(tree, path)
+
+ def ukey(self, path, ref=None):
+ return self.info(path, ref=ref)["hex"]
+
+ def _open(
+ self,
+ path,
+ mode="rb",
+ block_size=None,
+ autocommit=True,
+ cache_options=None,
+ ref=None,
+ **kwargs,
+ ):
+ obj = self._path_to_object(path, ref or self.ref)
+ return MemoryFile(data=obj.data)