diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/xlrd/__init__.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/xlrd/__init__.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/xlrd/__init__.py | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/xlrd/__init__.py b/.venv/lib/python3.12/site-packages/xlrd/__init__.py new file mode 100644 index 00000000..84d5f269 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/xlrd/__init__.py @@ -0,0 +1,213 @@ +# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd package, which is released under a +# BSD-style licence. +import os +import pprint +import sys +import zipfile + +from . import timemachine +from .biffh import ( + XL_CELL_BLANK, XL_CELL_BOOLEAN, XL_CELL_DATE, XL_CELL_EMPTY, XL_CELL_ERROR, + XL_CELL_NUMBER, XL_CELL_TEXT, XLRDError, biff_text_from_num, + error_text_from_code, +) +from .book import Book, colname, open_workbook_xls +from .compdoc import SIGNATURE as XLS_SIGNATURE +from .formula import * # is constrained by __all__ +from .info import __VERSION__, __version__ +from .sheet import empty_cell +from .xldate import XLDateError, xldate_as_datetime, xldate_as_tuple + + +#: descriptions of the file types :mod:`xlrd` can :func:`inspect <inspect_format>`. +FILE_FORMAT_DESCRIPTIONS = { + 'xls': 'Excel xls', + 'xlsb': 'Excel 2007 xlsb file', + 'xlsx': 'Excel xlsx file', + 'ods': 'Openoffice.org ODS file', + 'zip': 'Unknown ZIP file', + None: 'Unknown file type', +} + +ZIP_SIGNATURE = b"PK\x03\x04" + +PEEK_SIZE = max(len(XLS_SIGNATURE), len(ZIP_SIGNATURE)) + + +def inspect_format(path=None, content=None): + """ + Inspect the content at the supplied path or the :class:`bytes` content provided + and return the file's type as a :class:`str`, or ``None`` if it cannot + be determined. + + :param path: + A :class:`string <str>` path containing the content to inspect. + ``~`` will be expanded. + + :param content: + The :class:`bytes` content to inspect. + + :returns: + A :class:`str`, or ``None`` if the format cannot be determined. + The return value can always be looked up in :data:`FILE_FORMAT_DESCRIPTIONS` + to return a human-readable description of the format found. + """ + if content: + peek = content[:PEEK_SIZE] + else: + path = os.path.expanduser(path) + with open(path, "rb") as f: + peek = f.read(PEEK_SIZE) + + if peek.startswith(XLS_SIGNATURE): + return 'xls' + + if peek.startswith(ZIP_SIGNATURE): + zf = zipfile.ZipFile(timemachine.BYTES_IO(content) if content else path) + + # Workaround for some third party files that use forward slashes and + # lower case names. We map the expected name in lowercase to the + # actual filename in the zip container. + component_names = {name.replace('\\', '/').lower(): name + for name in zf.namelist()} + + if 'xl/workbook.xml' in component_names: + return 'xlsx' + if 'xl/workbook.bin' in component_names: + return 'xlsb' + if 'content.xml' in component_names: + return 'ods' + return 'zip' + + +def open_workbook(filename=None, + logfile=sys.stdout, + verbosity=0, + use_mmap=True, + file_contents=None, + encoding_override=None, + formatting_info=False, + on_demand=False, + ragged_rows=False, + ignore_workbook_corruption=False + ): + """ + Open a spreadsheet file for data extraction. + + :param filename: The path to the spreadsheet file to be opened. + + :param logfile: An open file to which messages and diagnostics are written. + + :param verbosity: Increases the volume of trace material written to the + logfile. + + :param use_mmap: + + Whether to use the mmap module is determined heuristically. + Use this arg to override the result. + + Current heuristic: mmap is used if it exists. + + :param file_contents: + + A string or an :class:`mmap.mmap` object or some other behave-alike + object. If ``file_contents`` is supplied, ``filename`` will not be used, + except (possibly) in messages. + + :param encoding_override: + + Used to overcome missing or bad codepage information + in older-version files. See :doc:`unicode`. + + :param formatting_info: + + The default is ``False``, which saves memory. + In this case, "Blank" cells, which are those with their own formatting + information but no data, are treated as empty by ignoring the file's + ``BLANK`` and ``MULBLANK`` records. + This cuts off any bottom or right "margin" of rows of empty or blank + cells. + Only :meth:`~xlrd.sheet.Sheet.cell_value` and + :meth:`~xlrd.sheet.Sheet.cell_type` are available. + + When ``True``, formatting information will be read from the spreadsheet + file. This provides all cells, including empty and blank cells. + Formatting information is available for each cell. + + Note that this will raise a NotImplementedError when used with an + xlsx file. + + :param on_demand: + + Governs whether sheets are all loaded initially or when demanded + by the caller. See :doc:`on_demand`. + + :param ragged_rows: + + The default of ``False`` means all rows are padded out with empty cells so + that all rows have the same size as found in + :attr:`~xlrd.sheet.Sheet.ncols`. + + ``True`` means that there are no empty cells at the ends of rows. + This can result in substantial memory savings if rows are of widely + varying sizes. See also the :meth:`~xlrd.sheet.Sheet.row_len` method. + + + :param ignore_workbook_corruption: + + This option allows to read corrupted workbooks. + When ``False`` you may face CompDocError: Workbook corruption. + When ``True`` that exception will be ignored. + + :returns: An instance of the :class:`~xlrd.book.Book` class. + """ + + file_format = inspect_format(filename, file_contents) + # We have to let unknown file formats pass through here, as some ancient + # files that xlrd can parse don't start with the expected signature. + if file_format and file_format != 'xls': + raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported') + + bk = open_workbook_xls( + filename=filename, + logfile=logfile, + verbosity=verbosity, + use_mmap=use_mmap, + file_contents=file_contents, + encoding_override=encoding_override, + formatting_info=formatting_info, + on_demand=on_demand, + ragged_rows=ragged_rows, + ignore_workbook_corruption=ignore_workbook_corruption, + ) + + return bk + + +def dump(filename, outfile=sys.stdout, unnumbered=False): + """ + For debugging: dump an XLS file's BIFF records in char & hex. + + :param filename: The path to the file to be dumped. + :param outfile: An open file, to which the dump is written. + :param unnumbered: If true, omit offsets (for meaningful diffs). + """ + from .biffh import biff_dump + bk = Book() + bk.biff2_8_load(filename=filename, logfile=outfile, ) + biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered) + + +def count_records(filename, outfile=sys.stdout): + """ + For debugging and analysis: summarise the file's BIFF records. + ie: produce a sorted file of ``(record_name, count)``. + + :param filename: The path to the file to be summarised. + :param outfile: An open file, to which the summary is written. + """ + from .biffh import biff_count_records + bk = Book() + bk.biff2_8_load(filename=filename, logfile=outfile, ) + biff_count_records(bk.mem, bk.base, bk.stream_len, outfile) |