diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/docutils/utils/math/mathml_elements.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/docutils/utils/math/mathml_elements.py | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docutils/utils/math/mathml_elements.py b/.venv/lib/python3.12/site-packages/docutils/utils/math/mathml_elements.py new file mode 100644 index 00000000..f2059c9f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docutils/utils/math/mathml_elements.py @@ -0,0 +1,478 @@ +# :Id: $Id: mathml_elements.py 9561 2024-03-14 16:34:48Z milde $ +# :Copyright: 2024 Günter Milde. +# +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause + +"""MathML element classes based on `xml.etree`. + +The module is intended for programmatic generation of MathML +and covers the part of `MathML Core`_ that is required by +Docutil's *TeX math to MathML* converter. + +This module is PROVISIONAL: +the API is not settled and may change with any minor Docutils version. + +.. _MathML Core: https://www.w3.org/TR/mathml-core/ +""" + +# Usage: +# +# >>> from mathml_elements import * + +import numbers +import xml.etree.ElementTree as ET + + +GLOBAL_ATTRIBUTES = ( + 'class', # space-separated list of element classes + # 'data-*', # custom data attributes (see HTML) + 'dir', # directionality ('ltr', 'rtl') + 'displaystyle', # True: normal, False: compact + 'id', # unique identifier + # 'mathbackground', # color definition, deprecated + # 'mathcolor', # color definition, deprecated + # 'mathsize', # font-size, deprecated + 'nonce', # cryptographic nonce ("number used once") + 'scriptlevel', # math-depth for the element + 'style', # CSS styling declarations + 'tabindex', # indicate if the element takes input focus + ) +"""Global MathML attributes + +https://w3c.github.io/mathml-core/#global-attributes +""" + + +# Base classes +# ------------ + +class MathElement(ET.Element): + """Base class for MathML elements.""" + + nchildren = None + """Expected number of children or None""" + # cf. https://www.w3.org/TR/MathML3/chapter3.html#id.3.1.3.2 + parent = None + """Parent node in MathML element tree.""" + + def __init__(self, *children, **attributes): + """Set up node with `children` and `attributes`. + + Attribute names are normalised to lowercase. + You may use "CLASS" to set a "class" attribute. + Attribute values are converted to strings + (with True -> "true" and False -> "false"). + + >>> math(CLASS='test', level=3, split=True) + math(class='test', level='3', split='true') + >>> math(CLASS='test', level=3, split=True).toxml() + '<math class="test" level="3" split="true"></math>' + + """ + attrib = {k.lower(): self.a_str(v) for k, v in attributes.items()} + super().__init__(self.__class__.__name__, **attrib) + self.extend(children) + + @staticmethod + def a_str(v): + # Return string representation for attribute value `v`. + if isinstance(v, bool): + return str(v).lower() + return str(v) + + def __repr__(self): + """Return full string representation.""" + args = [repr(child) for child in self] + if self.text: + args.append(repr(self.text)) + if self.nchildren != self.__class__.nchildren: + args.append(f'nchildren={self.nchildren}') + if getattr(self, 'switch', None): + args.append('switch=True') + args += [f'{k}={v!r}' for k, v in self.items() if v is not None] + return f'{self.tag}({", ".join(args)})' + + def __str__(self): + """Return concise, informal string representation.""" + if self.text: + args = repr(self.text) + else: + args = ', '.join(f'{child}' for child in self) + return f'{self.tag}({args})' + + def set(self, key, value): + super().set(key, self.a_str(value)) + + def __setitem__(self, key, value): + if self.nchildren == 0: + raise TypeError(f'Element "{self}" does not take children.') + if isinstance(value, MathElement): + value.parent = self + else: # value may be an iterable + if self.nchildren and len(self) + len(value) > self.nchildren: + raise TypeError(f'Element "{self}" takes only {self.nchildren}' + ' children') + for e in value: + e.parent = self + super().__setitem__(key, value) + + def is_full(self): + """Return boolean indicating whether children may be appended.""" + return self.nchildren is not None and len(self) >= self.nchildren + + def close(self): + """Close element and return first non-full anchestor or None.""" + self.nchildren = len(self) # mark node as full + parent = self.parent + while parent is not None and parent.is_full(): + parent = parent.parent + return parent + + def append(self, element): + """Append `element` and return new "current node" (insertion point). + + Append as child element and set the internal `parent` attribute. + + If self is already full, raise TypeError. + + If self is full after appending, call `self.close()` + (returns first non-full anchestor or None) else return `self`. + """ + if self.is_full(): + if self.nchildren: + status = f'takes only {self.nchildren} children' + else: + status = 'does not take children' + raise TypeError(f'Element "{self}" {status}.') + super().append(element) + element.parent = self + if self.is_full(): + return self.close() + return self + + def extend(self, elements): + """Sequentially append `elements`. Return new "current node". + + Raise TypeError if overfull. + """ + current_node = self + for element in elements: + current_node = self.append(element) + return current_node + + def pop(self, index=-1): + element = self[index] + del self[index] + return element + + def in_block(self): + """Return True, if `self` or an ancestor has ``display='block'``. + + Used to find out whether we are in inline vs. displayed maths. + """ + if self.get('display') is None: + try: + return self.parent.in_block() + except AttributeError: + return False + return self.get('display') == 'block' + + # XML output: + + def indent_xml(self, space=' ', level=0): + """Format XML output with indents. + + Use with care: + Formatting whitespace is permanently added to the + `text` and `tail` attributes of `self` and anchestors! + """ + ET.indent(self, space, level) + + def unindent_xml(self): + """Strip whitespace at the end of `text` and `tail` attributes... + + to revert changes made by the `indent_xml()` method. + Use with care, trailing whitespace from the original may be lost. + """ + for e in self.iter(): + if not isinstance(e, MathToken) and e.text: + e.text = e.text.rstrip() + if e.tail: + e.tail = e.tail.rstrip() + + def toxml(self, encoding=None): + """Return an XML representation of the element. + + By default, the return value is a `str` instance. With an explicit + `encoding` argument, the result is a `bytes` instance in the + specified encoding. The XML default encoding is UTF-8, any other + encoding must be specified in an XML document header. + + Name and encoding handling match `xml.dom.minidom.Node.toxml()` + while `etree.Element.tostring()` returns `bytes` by default. + """ + xml = ET.tostring(self, encoding or 'unicode', + short_empty_elements=False) + # Visible representation for "Apply Function" character: + try: + xml = xml.replace('\u2061', '⁡') + except TypeError: + xml = xml.replace('\u2061'.encode(encoding), b'⁡') + return xml + + +# Group sub-expressions in a horizontal row +# +# The elements <msqrt>, <mstyle>, <merror>, <mpadded>, <mphantom>, +# <menclose>, <mtd>, <mscarry>, and <math> treat their contents +# as a single inferred mrow formed from all their children. +# (https://www.w3.org/TR/mathml4/#presm_inferredmrow) +# +# MathML Core uses the term "anonymous mrow element". + +class MathRow(MathElement): + """Base class for elements treating content as a single mrow.""" + + +# 2d Schemata + +class MathSchema(MathElement): + """Base class for schemata expecting 2 or more children. + + The special attribute `switch` indicates that the last two child + elements are in reversed order and must be switched before XML-export. + See `msub` for an example. + """ + nchildren = 2 + + def __init__(self, *children, **kwargs): + self.switch = kwargs.pop('switch', False) + super().__init__(*children, **kwargs) + + def append(self, element): + """Append element. Normalize order and close if full.""" + current_node = super().append(element) + if self.switch and self.is_full(): + self[-1], self[-2] = self[-2], self[-1] + self.switch = False + return current_node + + +# Token elements represent the smallest units of mathematical notation which +# carry meaning. + +class MathToken(MathElement): + """Token Element: contains textual data instead of children. + + Expect text data on initialisation. + """ + nchildren = 0 + + def __init__(self, text, **attributes): + super().__init__(**attributes) + if not isinstance(text, (str, numbers.Number)): + raise ValueError('MathToken element expects `str` or number,' + f' not "{text}".') + self.text = str(text) + + +# MathML element classes +# ---------------------- + +class math(MathRow): + """Top-level MathML element, a single mathematical formula.""" + + +# Token elements +# ~~~~~~~~~~~~~~ + +class mtext(MathToken): + """Arbitrary text with no notational meaning.""" + + +class mi(MathToken): + """Identifier, such as a function name, variable or symbolic constant.""" + + +class mn(MathToken): + """Numeric literal. + + >>> mn(3.41).toxml() + '<mn>3.41</mn>' + + Normally a sequence of digits with a possible separator (a dot or a comma). + (Values with comma must be specified as `str`.) + """ + + +class mo(MathToken): + """Operator, Fence, Separator, or Accent. + + >>> mo('<').toxml() + '<mo><</mo>' + + Besides operators in strict mathematical meaning, this element also + includes "operators" like parentheses, separators like comma and + semicolon, or "absolute value" bars. + """ + + +class mspace(MathElement): + """Blank space, whose size is set by its attributes. + + Takes additional attributes `depth`, `height`, `width`. + Takes no children and no text. + + See also `mphantom`. + """ + nchildren = 0 + + +# General Layout Schemata +# ~~~~~~~~~~~~~~~~~~~~~~~ + +class mrow(MathRow): + """Generic element to group children as a horizontal row. + + Removed on closing if not required (see `mrow.close()`). + """ + + def transfer_attributes(self, other): + """Transfer attributes from self to other. + + "List values" (class, style) are appended to existing values, + other values replace existing values. + """ + delimiters = {'class': ' ', 'style': '; '} + for k, v in self.items(): + if k in ('class', 'style') and v: + if other.get(k): + v = delimiters[k].join( + (other.get(k).rstrip(delimiters[k]), v)) + other.set(k, v) + + def close(self): + """Close element and return first non-full anchestor or None. + + Remove <mrow> if it has only one child element. + """ + parent = self.parent + # replace `self` with single child + if parent is not None and len(self) == 1: + child = self[0] + try: + parent[list(parent).index(self)] = child + child.parent = parent + except (AttributeError, ValueError): + return None + self.transfer_attributes(child) + return super().close() + + +class mfrac(MathSchema): + """Fractions or fraction-like objects such as binomial coefficients.""" + + +class msqrt(MathRow): + """Square root. See also `mroot`.""" + nchildren = 1 # \sqrt expects one argument or a group + + +class mroot(MathSchema): + """Roots with an explicit index. See also `msqrt`.""" + + +class mstyle(MathRow): + """Style Change. + + In modern browsers, <mstyle> is equivalent to an <mrow> element. + However, <mstyle> may still be relevant for compatibility with + MathML implementations outside browsers. + """ + + +class merror(MathRow): + """Display contents as error messages.""" + + +class menclose(MathRow): + """Renders content inside an enclosing notation... + + ... specified by the notation attribute. + + Non-standard but still required by Firefox for boxed expressions. + """ + nchildren = 1 # \boxed expects one argument or a group + + +class mpadded(MathRow): + """Adjust space around content.""" + # nchildren = 1 # currently not used by latex2mathml + + +class mphantom(MathRow): + """Placeholder: Rendered invisibly but dimensions are kept.""" + nchildren = 1 # \phantom expects one argument or a group + + +# Script and Limit Schemata +# ~~~~~~~~~~~~~~~~~~~~~~~~~ + +class msub(MathSchema): + """Attach a subscript to an expression.""" + + +class msup(MathSchema): + """Attach a superscript to an expression.""" + + +class msubsup(MathSchema): + """Attach both a subscript and a superscript to an expression.""" + nchildren = 3 + +# Examples: +# +# The `switch` attribute reverses the order of the last two children: +# >>> msub(mn(1), mn(2)).toxml() +# '<msub><mn>1</mn><mn>2</mn></msub>' +# >>> msub(mn(1), mn(2), switch=True).toxml() +# '<msub><mn>2</mn><mn>1</mn></msub>' +# +# >>> msubsup(mi('base'), mn(1), mn(2)).toxml() +# '<msubsup><mi>base</mi><mn>1</mn><mn>2</mn></msubsup>' +# >>> msubsup(mi('base'), mn(1), mn(2), switch=True).toxml() +# '<msubsup><mi>base</mi><mn>2</mn><mn>1</mn></msubsup>' + + +class munder(msub): + """Attach an accent or a limit under an expression.""" + + +class mover(msup): + """Attach an accent or a limit over an expression.""" + + +class munderover(msubsup): + """Attach accents or limits both under and over an expression.""" + + +# Tabular Math +# ~~~~~~~~~~~~ + +class mtable(MathElement): + """Table or matrix element.""" + + +class mtr(MathRow): + """Row in a table or a matrix.""" + + +class mtd(MathRow): + """Cell in a table or a matrix""" |