diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/numpy/core/memmap.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/numpy/core/memmap.py | 338 |
1 files changed, 338 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/numpy/core/memmap.py b/.venv/lib/python3.12/site-packages/numpy/core/memmap.py new file mode 100644 index 00000000..79c69545 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/numpy/core/memmap.py @@ -0,0 +1,338 @@ +from contextlib import nullcontext + +import numpy as np +from .._utils import set_module +from .numeric import uint8, ndarray, dtype +from numpy.compat import os_fspath, is_pathlib_path + +__all__ = ['memmap'] + +dtypedescr = dtype +valid_filemodes = ["r", "c", "r+", "w+"] +writeable_filemodes = ["r+", "w+"] + +mode_equivalents = { + "readonly":"r", + "copyonwrite":"c", + "readwrite":"r+", + "write":"w+" + } + + +@set_module('numpy') +class memmap(ndarray): + """Create a memory-map to an array stored in a *binary* file on disk. + + Memory-mapped files are used for accessing small segments of large files + on disk, without reading the entire file into memory. NumPy's + memmap's are array-like objects. This differs from Python's ``mmap`` + module, which uses file-like objects. + + This subclass of ndarray has some unpleasant interactions with + some operations, because it doesn't quite fit properly as a subclass. + An alternative to using this subclass is to create the ``mmap`` + object yourself, then create an ndarray with ndarray.__new__ directly, + passing the object created in its 'buffer=' parameter. + + This class may at some point be turned into a factory function + which returns a view into an mmap buffer. + + Flush the memmap instance to write the changes to the file. Currently there + is no API to close the underlying ``mmap``. It is tricky to ensure the + resource is actually closed, since it may be shared between different + memmap instances. + + + Parameters + ---------- + filename : str, file-like object, or pathlib.Path instance + The file name or file object to be used as the array data buffer. + dtype : data-type, optional + The data-type used to interpret the file contents. + Default is `uint8`. + mode : {'r+', 'r', 'w+', 'c'}, optional + The file is opened in this mode: + + +------+-------------------------------------------------------------+ + | 'r' | Open existing file for reading only. | + +------+-------------------------------------------------------------+ + | 'r+' | Open existing file for reading and writing. | + +------+-------------------------------------------------------------+ + | 'w+' | Create or overwrite existing file for reading and writing. | + | | If ``mode == 'w+'`` then `shape` must also be specified. | + +------+-------------------------------------------------------------+ + | 'c' | Copy-on-write: assignments affect data in memory, but | + | | changes are not saved to disk. The file on disk is | + | | read-only. | + +------+-------------------------------------------------------------+ + + Default is 'r+'. + offset : int, optional + In the file, array data starts at this offset. Since `offset` is + measured in bytes, it should normally be a multiple of the byte-size + of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of + file are valid; The file will be extended to accommodate the + additional data. By default, ``memmap`` will start at the beginning of + the file, even if ``filename`` is a file pointer ``fp`` and + ``fp.tell() != 0``. + shape : tuple, optional + The desired shape of the array. If ``mode == 'r'`` and the number + of remaining bytes after `offset` is not a multiple of the byte-size + of `dtype`, you must specify `shape`. By default, the returned array + will be 1-D with the number of elements determined by file size + and data-type. + order : {'C', 'F'}, optional + Specify the order of the ndarray memory layout: + :term:`row-major`, C-style or :term:`column-major`, + Fortran-style. This only has an effect if the shape is + greater than 1-D. The default order is 'C'. + + Attributes + ---------- + filename : str or pathlib.Path instance + Path to the mapped file. + offset : int + Offset position in the file. + mode : str + File mode. + + Methods + ------- + flush + Flush any changes in memory to file on disk. + When you delete a memmap object, flush is called first to write + changes to disk. + + + See also + -------- + lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. + + Notes + ----- + The memmap object can be used anywhere an ndarray is accepted. + Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns + ``True``. + + Memory-mapped files cannot be larger than 2GB on 32-bit systems. + + When a memmap causes a file to be created or extended beyond its + current size in the filesystem, the contents of the new part are + unspecified. On systems with POSIX filesystem semantics, the extended + part will be filled with zero bytes. + + Examples + -------- + >>> data = np.arange(12, dtype='float32') + >>> data.resize((3,4)) + + This example uses a temporary file so that doctest doesn't write + files to your directory. You would use a 'normal' filename. + + >>> from tempfile import mkdtemp + >>> import os.path as path + >>> filename = path.join(mkdtemp(), 'newfile.dat') + + Create a memmap with dtype and shape that matches our data: + + >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) + >>> fp + memmap([[0., 0., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]], dtype=float32) + + Write data to memmap array: + + >>> fp[:] = data[:] + >>> fp + memmap([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + + >>> fp.filename == path.abspath(filename) + True + + Flushes memory changes to disk in order to read them back + + >>> fp.flush() + + Load the memmap and verify data was stored: + + >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) + >>> newfp + memmap([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + + Read-only memmap: + + >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) + >>> fpr.flags.writeable + False + + Copy-on-write memmap: + + >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) + >>> fpc.flags.writeable + True + + It's possible to assign to copy-on-write array, but values are only + written into the memory copy of the array, and not written to disk: + + >>> fpc + memmap([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + >>> fpc[0,:] = 0 + >>> fpc + memmap([[ 0., 0., 0., 0.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + + File on disk is unchanged: + + >>> fpr + memmap([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + + Offset into a memmap: + + >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) + >>> fpo + memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) + + """ + + __array_priority__ = -100.0 + + def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, + shape=None, order='C'): + # Import here to minimize 'import numpy' overhead + import mmap + import os.path + try: + mode = mode_equivalents[mode] + except KeyError as e: + if mode not in valid_filemodes: + raise ValueError( + "mode must be one of {!r} (got {!r})" + .format(valid_filemodes + list(mode_equivalents.keys()), mode) + ) from None + + if mode == 'w+' and shape is None: + raise ValueError("shape must be given if mode == 'w+'") + + if hasattr(filename, 'read'): + f_ctx = nullcontext(filename) + else: + f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b') + + with f_ctx as fid: + fid.seek(0, 2) + flen = fid.tell() + descr = dtypedescr(dtype) + _dbytes = descr.itemsize + + if shape is None: + bytes = flen - offset + if bytes % _dbytes: + raise ValueError("Size of available data is not a " + "multiple of the data-type size.") + size = bytes // _dbytes + shape = (size,) + else: + if not isinstance(shape, tuple): + shape = (shape,) + size = np.intp(1) # avoid default choice of np.int_, which might overflow + for k in shape: + size *= k + + bytes = int(offset + size*_dbytes) + + if mode in ('w+', 'r+') and flen < bytes: + fid.seek(bytes - 1, 0) + fid.write(b'\0') + fid.flush() + + if mode == 'c': + acc = mmap.ACCESS_COPY + elif mode == 'r': + acc = mmap.ACCESS_READ + else: + acc = mmap.ACCESS_WRITE + + start = offset - offset % mmap.ALLOCATIONGRANULARITY + bytes -= start + array_offset = offset - start + mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) + + self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, + offset=array_offset, order=order) + self._mmap = mm + self.offset = offset + self.mode = mode + + if is_pathlib_path(filename): + # special case - if we were constructed with a pathlib.path, + # then filename is a path object, not a string + self.filename = filename.resolve() + elif hasattr(fid, "name") and isinstance(fid.name, str): + # py3 returns int for TemporaryFile().name + self.filename = os.path.abspath(fid.name) + # same as memmap copies (e.g. memmap + 1) + else: + self.filename = None + + return self + + def __array_finalize__(self, obj): + if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): + self._mmap = obj._mmap + self.filename = obj.filename + self.offset = obj.offset + self.mode = obj.mode + else: + self._mmap = None + self.filename = None + self.offset = None + self.mode = None + + def flush(self): + """ + Write any changes in the array to the file on disk. + + For further information, see `memmap`. + + Parameters + ---------- + None + + See Also + -------- + memmap + + """ + if self.base is not None and hasattr(self.base, 'flush'): + self.base.flush() + + def __array_wrap__(self, arr, context=None): + arr = super().__array_wrap__(arr, context) + + # Return a memmap if a memmap was given as the output of the + # ufunc. Leave the arr class unchanged if self is not a memmap + # to keep original memmap subclasses behavior + if self is arr or type(self) is not memmap: + return arr + # Return scalar instead of 0d memmap, e.g. for np.sum with + # axis=None + if arr.shape == (): + return arr[()] + # Return ndarray otherwise + return arr.view(np.ndarray) + + def __getitem__(self, index): + res = super().__getitem__(index) + if type(res) is memmap and res._mmap is None: + return res.view(type=ndarray) + return res |