aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/openpyxl/reader
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/openpyxl/reader')
-rw-r--r--.venv/lib/python3.12/site-packages/openpyxl/reader/__init__.py1
-rw-r--r--.venv/lib/python3.12/site-packages/openpyxl/reader/drawings.py71
-rw-r--r--.venv/lib/python3.12/site-packages/openpyxl/reader/excel.py349
-rw-r--r--.venv/lib/python3.12/site-packages/openpyxl/reader/strings.py44
-rw-r--r--.venv/lib/python3.12/site-packages/openpyxl/reader/workbook.py133
5 files changed, 598 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/openpyxl/reader/__init__.py b/.venv/lib/python3.12/site-packages/openpyxl/reader/__init__.py
new file mode 100644
index 00000000..ab6cdead
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openpyxl/reader/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2010-2024 openpyxl
diff --git a/.venv/lib/python3.12/site-packages/openpyxl/reader/drawings.py b/.venv/lib/python3.12/site-packages/openpyxl/reader/drawings.py
new file mode 100644
index 00000000..caaa8570
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openpyxl/reader/drawings.py
@@ -0,0 +1,71 @@
+
+# Copyright (c) 2010-2024 openpyxl
+
+
+from io import BytesIO
+from warnings import warn
+
+from openpyxl.xml.functions import fromstring
+from openpyxl.xml.constants import IMAGE_NS
+from openpyxl.packaging.relationship import (
+ get_rel,
+ get_rels_path,
+ get_dependents,
+)
+from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing
+from openpyxl.drawing.image import Image, PILImage
+from openpyxl.chart.chartspace import ChartSpace
+from openpyxl.chart.reader import read_chart
+
+
+def find_images(archive, path):
+ """
+ Given the path to a drawing file extract charts and images
+
+ Ignore errors due to unsupported parts of DrawingML
+ """
+
+ src = archive.read(path)
+ tree = fromstring(src)
+ try:
+ drawing = SpreadsheetDrawing.from_tree(tree)
+ except TypeError:
+ warn("DrawingML support is incomplete and limited to charts and images only. Shapes and drawings will be lost.")
+ return [], []
+
+ rels_path = get_rels_path(path)
+ deps = []
+ if rels_path in archive.namelist():
+ deps = get_dependents(archive, rels_path)
+
+ charts = []
+ for rel in drawing._chart_rels:
+ try:
+ cs = get_rel(archive, deps, rel.id, ChartSpace)
+ except TypeError as e:
+ warn(f"Unable to read chart {rel.id} from {path} {e}")
+ continue
+ chart = read_chart(cs)
+ chart.anchor = rel.anchor
+ charts.append(chart)
+
+ images = []
+ if not PILImage: # Pillow not installed, drop images
+ return charts, images
+
+ for rel in drawing._blip_rels:
+ dep = deps.get(rel.embed)
+ if dep.Type == IMAGE_NS:
+ try:
+ image = Image(BytesIO(archive.read(dep.target)))
+ except OSError:
+ msg = "The image {0} will be removed because it cannot be read".format(dep.target)
+ warn(msg)
+ continue
+ if image.format.upper() == "WMF": # cannot save
+ msg = "{0} image format is not supported so the image is being dropped".format(image.format)
+ warn(msg)
+ continue
+ image.anchor = rel.anchor
+ images.append(image)
+ return charts, images
diff --git a/.venv/lib/python3.12/site-packages/openpyxl/reader/excel.py b/.venv/lib/python3.12/site-packages/openpyxl/reader/excel.py
new file mode 100644
index 00000000..dfd8eeac
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openpyxl/reader/excel.py
@@ -0,0 +1,349 @@
+# Copyright (c) 2010-2024 openpyxl
+
+
+"""Read an xlsx file into Python"""
+
+# Python stdlib imports
+from zipfile import ZipFile, ZIP_DEFLATED
+from io import BytesIO
+import os.path
+import warnings
+
+from openpyxl.pivot.table import TableDefinition
+
+# Allow blanket setting of KEEP_VBA for testing
+try:
+ from ..tests import KEEP_VBA
+except ImportError:
+ KEEP_VBA = False
+
+# package imports
+from openpyxl.utils.exceptions import InvalidFileException
+from openpyxl.xml.constants import (
+ ARC_CORE,
+ ARC_CUSTOM,
+ ARC_CONTENT_TYPES,
+ ARC_WORKBOOK,
+ ARC_THEME,
+ COMMENTS_NS,
+ SHARED_STRINGS,
+ XLTM,
+ XLTX,
+ XLSM,
+ XLSX,
+)
+from openpyxl.cell import MergedCell
+from openpyxl.comments.comment_sheet import CommentSheet
+
+from .strings import read_string_table, read_rich_text
+from .workbook import WorkbookParser
+from openpyxl.styles.stylesheet import apply_stylesheet
+
+from openpyxl.packaging.core import DocumentProperties
+from openpyxl.packaging.custom import CustomPropertyList
+from openpyxl.packaging.manifest import Manifest, Override
+
+from openpyxl.packaging.relationship import (
+ RelationshipList,
+ get_dependents,
+ get_rels_path,
+)
+
+from openpyxl.worksheet._read_only import ReadOnlyWorksheet
+from openpyxl.worksheet._reader import WorksheetReader
+from openpyxl.chartsheet import Chartsheet
+from openpyxl.worksheet.table import Table
+from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing
+
+from openpyxl.xml.functions import fromstring
+
+from .drawings import find_images
+
+
+SUPPORTED_FORMATS = ('.xlsx', '.xlsm', '.xltx', '.xltm')
+
+
+def _validate_archive(filename):
+ """
+ Does a first check whether filename is a string or a file-like
+ object. If it is a string representing a filename, a check is done
+ for supported formats by checking the given file-extension. If the
+ file-extension is not in SUPPORTED_FORMATS an InvalidFileException
+ will raised. Otherwise the filename (resp. file-like object) will
+ forwarded to zipfile.ZipFile returning a ZipFile-Instance.
+ """
+ is_file_like = hasattr(filename, 'read')
+ if not is_file_like:
+ file_format = os.path.splitext(filename)[-1].lower()
+ if file_format not in SUPPORTED_FORMATS:
+ if file_format == '.xls':
+ msg = ('openpyxl does not support the old .xls file format, '
+ 'please use xlrd to read this file, or convert it to '
+ 'the more recent .xlsx file format.')
+ elif file_format == '.xlsb':
+ msg = ('openpyxl does not support binary format .xlsb, '
+ 'please convert this file to .xlsx format if you want '
+ 'to open it with openpyxl')
+ else:
+ msg = ('openpyxl does not support %s file format, '
+ 'please check you can open '
+ 'it with Excel first. '
+ 'Supported formats are: %s') % (file_format,
+ ','.join(SUPPORTED_FORMATS))
+ raise InvalidFileException(msg)
+
+ archive = ZipFile(filename, 'r')
+ return archive
+
+
+def _find_workbook_part(package):
+ workbook_types = [XLTM, XLTX, XLSM, XLSX]
+ for ct in workbook_types:
+ part = package.find(ct)
+ if part:
+ return part
+
+ # some applications reassign the default for application/xml
+ defaults = {p.ContentType for p in package.Default}
+ workbook_type = defaults & set(workbook_types)
+ if workbook_type:
+ return Override("/" + ARC_WORKBOOK, workbook_type.pop())
+
+ raise IOError("File contains no valid workbook part")
+
+
+class ExcelReader:
+
+ """
+ Read an Excel package and dispatch the contents to the relevant modules
+ """
+
+ def __init__(self, fn, read_only=False, keep_vba=KEEP_VBA,
+ data_only=False, keep_links=True, rich_text=False):
+ self.archive = _validate_archive(fn)
+ self.valid_files = self.archive.namelist()
+ self.read_only = read_only
+ self.keep_vba = keep_vba
+ self.data_only = data_only
+ self.keep_links = keep_links
+ self.rich_text = rich_text
+ self.shared_strings = []
+
+
+ def read_manifest(self):
+ src = self.archive.read(ARC_CONTENT_TYPES)
+ root = fromstring(src)
+ self.package = Manifest.from_tree(root)
+
+
+ def read_strings(self):
+ ct = self.package.find(SHARED_STRINGS)
+ reader = read_string_table
+ if self.rich_text:
+ reader = read_rich_text
+ if ct is not None:
+ strings_path = ct.PartName[1:]
+ with self.archive.open(strings_path,) as src:
+ self.shared_strings = reader(src)
+
+
+ def read_workbook(self):
+ wb_part = _find_workbook_part(self.package)
+ self.parser = WorkbookParser(self.archive, wb_part.PartName[1:], keep_links=self.keep_links)
+ self.parser.parse()
+ wb = self.parser.wb
+ wb._sheets = []
+ wb._data_only = self.data_only
+ wb._read_only = self.read_only
+ wb.template = wb_part.ContentType in (XLTX, XLTM)
+
+ # If are going to preserve the vba then attach a copy of the archive to the
+ # workbook so that is available for the save.
+ if self.keep_vba:
+ wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
+ for name in self.valid_files:
+ wb.vba_archive.writestr(name, self.archive.read(name))
+
+ if self.read_only:
+ wb._archive = self.archive
+
+ self.wb = wb
+
+
+ def read_properties(self):
+ if ARC_CORE in self.valid_files:
+ src = fromstring(self.archive.read(ARC_CORE))
+ self.wb.properties = DocumentProperties.from_tree(src)
+
+
+ def read_custom(self):
+ if ARC_CUSTOM in self.valid_files:
+ src = fromstring(self.archive.read(ARC_CUSTOM))
+ self.wb.custom_doc_props = CustomPropertyList.from_tree(src)
+
+
+ def read_theme(self):
+ if ARC_THEME in self.valid_files:
+ self.wb.loaded_theme = self.archive.read(ARC_THEME)
+
+
+ def read_chartsheet(self, sheet, rel):
+ sheet_path = rel.target
+ rels_path = get_rels_path(sheet_path)
+ rels = []
+ if rels_path in self.valid_files:
+ rels = get_dependents(self.archive, rels_path)
+
+ with self.archive.open(sheet_path, "r") as src:
+ xml = src.read()
+ node = fromstring(xml)
+ cs = Chartsheet.from_tree(node)
+ cs._parent = self.wb
+ cs.title = sheet.name
+ self.wb._add_sheet(cs)
+
+ drawings = rels.find(SpreadsheetDrawing._rel_type)
+ for rel in drawings:
+ charts, images = find_images(self.archive, rel.target)
+ for c in charts:
+ cs.add_chart(c)
+
+
+ def read_worksheets(self):
+ comment_warning = """Cell '{0}':{1} is part of a merged range but has a comment which will be removed because merged cells cannot contain any data."""
+ for sheet, rel in self.parser.find_sheets():
+ if rel.target not in self.valid_files:
+ continue
+
+ if "chartsheet" in rel.Type:
+ self.read_chartsheet(sheet, rel)
+ continue
+
+ rels_path = get_rels_path(rel.target)
+ rels = RelationshipList()
+ if rels_path in self.valid_files:
+ rels = get_dependents(self.archive, rels_path)
+
+ if self.read_only:
+ ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target, self.shared_strings)
+ ws.sheet_state = sheet.state
+ self.wb._sheets.append(ws)
+ continue
+ else:
+ fh = self.archive.open(rel.target)
+ ws = self.wb.create_sheet(sheet.name)
+ ws._rels = rels
+ ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only, self.rich_text)
+ ws_parser.bind_all()
+ fh.close()
+
+ # assign any comments to cells
+ for r in rels.find(COMMENTS_NS):
+ src = self.archive.read(r.target)
+ comment_sheet = CommentSheet.from_tree(fromstring(src))
+ for ref, comment in comment_sheet.comments:
+ try:
+ ws[ref].comment = comment
+ except AttributeError:
+ c = ws[ref]
+ if isinstance(c, MergedCell):
+ warnings.warn(comment_warning.format(ws.title, c.coordinate))
+ continue
+
+ # preserve link to VML file if VBA
+ if self.wb.vba_archive and ws.legacy_drawing:
+ ws.legacy_drawing = rels.get(ws.legacy_drawing).target
+ else:
+ ws.legacy_drawing = None
+
+ for t in ws_parser.tables:
+ src = self.archive.read(t)
+ xml = fromstring(src)
+ table = Table.from_tree(xml)
+ ws.add_table(table)
+
+ drawings = rels.find(SpreadsheetDrawing._rel_type)
+ for rel in drawings:
+ charts, images = find_images(self.archive, rel.target)
+ for c in charts:
+ ws.add_chart(c, c.anchor)
+ for im in images:
+ ws.add_image(im, im.anchor)
+
+ pivot_rel = rels.find(TableDefinition.rel_type)
+ pivot_caches = self.parser.pivot_caches
+ for r in pivot_rel:
+ pivot_path = r.Target
+ src = self.archive.read(pivot_path)
+ tree = fromstring(src)
+ pivot = TableDefinition.from_tree(tree)
+ pivot.cache = pivot_caches[pivot.cacheId]
+ ws.add_pivot(pivot)
+
+ ws.sheet_state = sheet.state
+
+
+ def read(self):
+ action = "read manifest"
+ try:
+ self.read_manifest()
+ action = "read strings"
+ self.read_strings()
+ action = "read workbook"
+ self.read_workbook()
+ action = "read properties"
+ self.read_properties()
+ action = "read custom properties"
+ self.read_custom()
+ action = "read theme"
+ self.read_theme()
+ action = "read stylesheet"
+ apply_stylesheet(self.archive, self.wb)
+ action = "read worksheets"
+ self.read_worksheets()
+ action = "assign names"
+ self.parser.assign_names()
+ if not self.read_only:
+ self.archive.close()
+ except ValueError as e:
+ raise ValueError(
+ f"Unable to read workbook: could not {action} from {self.archive.filename}.\n"
+ "This is most probably because the workbook source files contain some invalid XML.\n"
+ "Please see the exception for more details."
+ ) from e
+
+
+def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
+ data_only=False, keep_links=True, rich_text=False):
+ """Open the given filename and return the workbook
+
+ :param filename: the path to open or a file-like object
+ :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`
+
+ :param read_only: optimised for reading, content cannot be edited
+ :type read_only: bool
+
+ :param keep_vba: preserve vba content (this does NOT mean you can use it)
+ :type keep_vba: bool
+
+ :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
+ :type data_only: bool
+
+ :param keep_links: whether links to external workbooks should be preserved. The default is True
+ :type keep_links: bool
+
+ :param rich_text: if set to True openpyxl will preserve any rich text formatting in cells. The default is False
+ :type rich_text: bool
+
+ :rtype: :class:`openpyxl.workbook.Workbook`
+
+ .. note::
+
+ When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
+ and the returned workbook will be read-only.
+
+ """
+ reader = ExcelReader(filename, read_only, keep_vba,
+ data_only, keep_links, rich_text)
+ reader.read()
+ return reader.wb
diff --git a/.venv/lib/python3.12/site-packages/openpyxl/reader/strings.py b/.venv/lib/python3.12/site-packages/openpyxl/reader/strings.py
new file mode 100644
index 00000000..5168f201
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openpyxl/reader/strings.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2010-2024 openpyxl
+
+from openpyxl.cell.text import Text
+
+from openpyxl.xml.functions import iterparse
+from openpyxl.xml.constants import SHEET_MAIN_NS
+from openpyxl.cell.rich_text import CellRichText
+
+
+def read_string_table(xml_source):
+ """Read in all shared strings in the table"""
+
+ strings = []
+ STRING_TAG = '{%s}si' % SHEET_MAIN_NS
+
+ for _, node in iterparse(xml_source):
+ if node.tag == STRING_TAG:
+ text = Text.from_tree(node).content
+ text = text.replace('x005F_', '')
+ node.clear()
+
+ strings.append(text)
+
+ return strings
+
+
+def read_rich_text(xml_source):
+ """Read in all shared strings in the table"""
+
+ strings = []
+ STRING_TAG = '{%s}si' % SHEET_MAIN_NS
+
+ for _, node in iterparse(xml_source):
+ if node.tag == STRING_TAG:
+ text = CellRichText.from_tree(node)
+ if len(text) == 0:
+ text = ''
+ elif len(text) == 1 and isinstance(text[0], str):
+ text = text[0]
+ node.clear()
+
+ strings.append(text)
+
+ return strings
diff --git a/.venv/lib/python3.12/site-packages/openpyxl/reader/workbook.py b/.venv/lib/python3.12/site-packages/openpyxl/reader/workbook.py
new file mode 100644
index 00000000..2afbfddb
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/openpyxl/reader/workbook.py
@@ -0,0 +1,133 @@
+# Copyright (c) 2010-2024 openpyxl
+
+from warnings import warn
+
+from openpyxl.xml.functions import fromstring
+
+from openpyxl.packaging.relationship import (
+ get_dependents,
+ get_rels_path,
+ get_rel,
+)
+from openpyxl.packaging.workbook import WorkbookPackage
+from openpyxl.workbook import Workbook
+from openpyxl.workbook.defined_name import DefinedNameList
+from openpyxl.workbook.external_link.external import read_external_link
+from openpyxl.pivot.cache import CacheDefinition
+from openpyxl.pivot.record import RecordList
+from openpyxl.worksheet.print_settings import PrintTitles, PrintArea
+
+from openpyxl.utils.datetime import CALENDAR_MAC_1904
+
+
+class WorkbookParser:
+
+ _rels = None
+
+ def __init__(self, archive, workbook_part_name, keep_links=True):
+ self.archive = archive
+ self.workbook_part_name = workbook_part_name
+ self.defined_names = DefinedNameList()
+ self.wb = Workbook()
+ self.keep_links = keep_links
+ self.sheets = []
+
+
+ @property
+ def rels(self):
+ if self._rels is None:
+ self._rels = get_dependents(self.archive, get_rels_path(self.workbook_part_name)).to_dict()
+ return self._rels
+
+
+ def parse(self):
+ src = self.archive.read(self.workbook_part_name)
+ node = fromstring(src)
+ package = WorkbookPackage.from_tree(node)
+ if package.properties.date1904:
+ self.wb.epoch = CALENDAR_MAC_1904
+
+ self.wb.code_name = package.properties.codeName
+ self.wb.active = package.active
+ self.wb.views = package.bookViews
+ self.sheets = package.sheets
+ self.wb.calculation = package.calcPr
+ self.caches = package.pivotCaches
+
+ # external links contain cached worksheets and can be very big
+ if not self.keep_links:
+ package.externalReferences = []
+
+ for ext_ref in package.externalReferences:
+ rel = self.rels.get(ext_ref.id)
+ self.wb._external_links.append(
+ read_external_link(self.archive, rel.Target)
+ )
+
+ if package.definedNames:
+ self.defined_names = package.definedNames
+
+ self.wb.security = package.workbookProtection
+
+
+ def find_sheets(self):
+ """
+ Find all sheets in the workbook and return the link to the source file.
+
+ Older XLSM files sometimes contain invalid sheet elements.
+ Warn user when these are removed.
+ """
+
+ for sheet in self.sheets:
+ if not sheet.id:
+ msg = f"File contains an invalid specification for {0}. This will be removed".format(sheet.name)
+ warn(msg)
+ continue
+ yield sheet, self.rels[sheet.id]
+
+
+ def assign_names(self):
+ """
+ Bind defined names and other definitions to worksheets or the workbook
+ """
+
+ for idx, names in self.defined_names.by_sheet().items():
+ if idx == "global":
+ self.wb.defined_names = names
+ continue
+
+ try:
+ sheet = self.wb._sheets[idx]
+ except IndexError:
+ warn(f"Defined names for sheet index {idx} cannot be located")
+ continue
+
+ for name, defn in names.items():
+ reserved = defn.is_reserved
+ if reserved is None:
+ sheet.defined_names[name] = defn
+
+ elif reserved == "Print_Titles":
+ titles = PrintTitles.from_string(defn.value)
+ sheet._print_rows = titles.rows
+ sheet._print_cols = titles.cols
+ elif reserved == "Print_Area":
+ try:
+ sheet._print_area = PrintArea.from_string(defn.value)
+ except TypeError:
+ warn(f"Print area cannot be set to Defined name: {defn.value}.")
+ continue
+
+ @property
+ def pivot_caches(self):
+ """
+ Get PivotCache objects
+ """
+ d = {}
+ for c in self.caches:
+ cache = get_rel(self.archive, self.rels, id=c.id, cls=CacheDefinition)
+ if cache.deps:
+ records = get_rel(self.archive, cache.deps, cache.id, RecordList)
+ cache.records = records
+ d[c.cacheId] = cache
+ return d