diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/markdown/treeprocessors.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/markdown/treeprocessors.py | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/markdown/treeprocessors.py b/.venv/lib/python3.12/site-packages/markdown/treeprocessors.py new file mode 100644 index 00000000..83630999 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/markdown/treeprocessors.py @@ -0,0 +1,476 @@ +# Python Markdown + +# A Python implementation of John Gruber's Markdown. + +# Documentation: https://python-markdown.github.io/ +# GitHub: https://github.com/Python-Markdown/markdown/ +# PyPI: https://pypi.org/project/Markdown/ + +# Started by Manfred Stienstra (http://www.dwerg.net/). +# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +# Currently maintained by Waylan Limberg (https://github.com/waylan), +# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later) +# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +# Copyright 2004 Manfred Stienstra (the original version) + +# License: BSD (see LICENSE.md for details). + +""" +Tree processors manipulate the tree created by block processors. They can even create an entirely +new `ElementTree` object. This is an excellent place for creating summaries, adding collected +references, or last minute adjustments. + +""" + +from __future__ import annotations + +import re +import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING, Any +from . import util +from . import inlinepatterns + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown + + +def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]: + """ Build the default `treeprocessors` for Markdown. """ + treeprocessors = util.Registry() + treeprocessors.register(InlineProcessor(md), 'inline', 20) + treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10) + treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0) + return treeprocessors + + +def isString(s: object) -> bool: + """ Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """ + if not isinstance(s, util.AtomicString): + return isinstance(s, str) + return False + + +class Treeprocessor(util.Processor): + """ + `Treeprocessor`s are run on the `ElementTree` object before serialization. + + Each `Treeprocessor` implements a `run` method that takes a pointer to an + `Element` and modifies it as necessary. + + `Treeprocessors` must extend `markdown.Treeprocessor`. + + """ + def run(self, root: etree.Element) -> etree.Element | None: + """ + Subclasses of `Treeprocessor` should implement a `run` method, which + takes a root `Element`. This method can return another `Element` + object, and the existing root `Element` will be replaced, or it can + modify the current tree and return `None`. + """ + pass # pragma: no cover + + +class InlineProcessor(Treeprocessor): + """ + A `Treeprocessor` that traverses a tree, applying inline patterns. + """ + + def __init__(self, md: Markdown): + self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX + self.__placeholder_suffix = util.ETX + self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ + + len(self.__placeholder_suffix) + self.__placeholder_re = util.INLINE_PLACEHOLDER_RE + self.md = md + self.inlinePatterns = md.inlinePatterns + self.ancestors: list[str] = [] + + def __makePlaceholder(self, type: str) -> tuple[str, str]: + """ Generate a placeholder """ + id = "%04d" % len(self.stashed_nodes) + hash = util.INLINE_PLACEHOLDER % id + return hash, id + + def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]: + """ + Extract id from data string, start from index. + + Arguments: + data: String. + index: Index, from which we start search. + + Returns: + Placeholder id and string index, after the found placeholder. + + """ + m = self.__placeholder_re.search(data, index) + if m: + return m.group(1), m.end() + else: + return None, index + 1 + + def __stashNode(self, node: etree.Element | str, type: str) -> str: + """ Add node to stash. """ + placeholder, id = self.__makePlaceholder(type) + self.stashed_nodes[id] = node + return placeholder + + def __handleInline(self, data: str, patternIndex: int = 0) -> str: + """ + Process string with inline patterns and replace it with placeholders. + + Arguments: + data: A line of Markdown text. + patternIndex: The index of the `inlinePattern` to start with. + + Returns: + String with placeholders. + + """ + if not isinstance(data, util.AtomicString): + startIndex = 0 + count = len(self.inlinePatterns) + while patternIndex < count: + data, matched, startIndex = self.__applyPattern( + self.inlinePatterns[patternIndex], data, patternIndex, startIndex + ) + if not matched: + patternIndex += 1 + return data + + def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None: + """ + Process placeholders in `Element.text` or `Element.tail` + of Elements popped from `self.stashed_nodes`. + + Arguments: + node: Parent node. + subnode: Processing node. + isText: Boolean variable, True - it's text, False - it's a tail. + + """ + if isText: + text = subnode.text + subnode.text = None + else: + text = subnode.tail + subnode.tail = None + + childResult = self.__processPlaceholders(text, subnode, isText) + + if not isText and node is not subnode: + pos = list(node).index(subnode) + 1 + else: + pos = 0 + + childResult.reverse() + for newChild in childResult: + node.insert(pos, newChild[0]) + + def __processPlaceholders( + self, + data: str | None, + parent: etree.Element, + isText: bool = True + ) -> list[tuple[etree.Element, list[str]]]: + """ + Process string with placeholders and generate `ElementTree` tree. + + Arguments: + data: String with placeholders instead of `ElementTree` elements. + parent: Element, which contains processing inline data. + isText: Boolean variable, True - it's text, False - it's a tail. + + Returns: + List with `ElementTree` elements with applied inline patterns. + + """ + def linkText(text: str | None) -> None: + if text: + if result: + if result[-1][0].tail: + result[-1][0].tail += text + else: + result[-1][0].tail = text + elif not isText: + if parent.tail: + parent.tail += text + else: + parent.tail = text + else: + if parent.text: + parent.text += text + else: + parent.text = text + result = [] + strartIndex = 0 + while data: + index = data.find(self.__placeholder_prefix, strartIndex) + if index != -1: + id, phEndIndex = self.__findPlaceholder(data, index) + + if id in self.stashed_nodes: + node = self.stashed_nodes.get(id) + + if index > 0: + text = data[strartIndex:index] + linkText(text) + + if not isinstance(node, str): # it's Element + for child in [node] + list(node): + if child.tail: + if child.tail.strip(): + self.__processElementText( + node, child, False + ) + if child.text: + if child.text.strip(): + self.__processElementText(child, child) + else: # it's just a string + linkText(node) + strartIndex = phEndIndex + continue + + strartIndex = phEndIndex + result.append((node, self.ancestors[:])) + + else: # wrong placeholder + end = index + len(self.__placeholder_prefix) + linkText(data[strartIndex:end]) + strartIndex = end + else: + text = data[strartIndex:] + if isinstance(data, util.AtomicString): + # We don't want to loose the `AtomicString` + text = util.AtomicString(text) + linkText(text) + data = "" + + return result + + def __applyPattern( + self, + pattern: inlinepatterns.Pattern, + data: str, + patternIndex: int, + startIndex: int = 0 + ) -> tuple[str, bool, int]: + """ + Check if the line fits the pattern, create the necessary + elements, add it to `stashed_nodes`. + + Arguments: + data: The text to be processed. + pattern: The pattern to be checked. + patternIndex: Index of current pattern. + startIndex: String index, from which we start searching. + + Returns: + String with placeholders instead of `ElementTree` elements. + + """ + new_style = isinstance(pattern, inlinepatterns.InlineProcessor) + + for exclude in pattern.ANCESTOR_EXCLUDES: + if exclude.lower() in self.ancestors: + return data, False, 0 + + if new_style: + match = None + # Since `handleMatch` may reject our first match, + # we iterate over the buffer looking for matches + # until we can't find any more. + for match in pattern.getCompiledRegExp().finditer(data, startIndex): + node, start, end = pattern.handleMatch(match, data) + if start is None or end is None: + startIndex += match.end(0) + match = None + continue + break + else: # pragma: no cover + match = pattern.getCompiledRegExp().match(data[startIndex:]) + leftData = data[:startIndex] + + if not match: + return data, False, 0 + + if not new_style: # pragma: no cover + node = pattern.handleMatch(match) + start = match.start(0) + end = match.end(0) + + if node is None: + return data, True, end + + if not isinstance(node, str): + if not isinstance(node.text, util.AtomicString): + # We need to process current node too + for child in [node] + list(node): + if not isString(node): + if child.text: + self.ancestors.append(child.tag.lower()) + child.text = self.__handleInline( + child.text, patternIndex + 1 + ) + self.ancestors.pop() + if child.tail: + child.tail = self.__handleInline( + child.tail, patternIndex + ) + + placeholder = self.__stashNode(node, pattern.type()) + + if new_style: + return "{}{}{}".format(data[:start], + placeholder, data[end:]), True, 0 + else: # pragma: no cover + return "{}{}{}{}".format(leftData, + match.group(1), + placeholder, match.groups()[-1]), True, 0 + + def __build_ancestors(self, parent: etree.Element | None, parents: list[str]) -> None: + """Build the ancestor list.""" + ancestors = [] + while parent is not None: + if parent is not None: + ancestors.append(parent.tag.lower()) + parent = self.parent_map.get(parent) + ancestors.reverse() + parents.extend(ancestors) + + def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.Element: + """Apply inline patterns to a parsed Markdown tree. + + Iterate over `Element`, find elements with inline tag, apply inline + patterns and append newly created Elements to tree. To avoid further + processing of string with inline patterns, instead of normal string, + use subclass [`AtomicString`][markdown.util.AtomicString]: + + node.text = markdown.util.AtomicString("This will not be processed.") + + Arguments: + tree: `Element` object, representing Markdown tree. + ancestors: List of parent tag names that precede the tree node (if needed). + + Returns: + An element tree object with applied inline patterns. + + """ + self.stashed_nodes: dict[str, etree.Element | str] = {} + + # Ensure a valid parent list, but copy passed in lists + # to ensure we don't have the user accidentally change it on us. + tree_parents = [] if ancestors is None else ancestors[:] + + self.parent_map = {c: p for p in tree.iter() for c in p} + stack = [(tree, tree_parents)] + + while stack: + currElement, parents = stack.pop() + + self.ancestors = parents + self.__build_ancestors(currElement, self.ancestors) + + insertQueue = [] + for child in currElement: + if child.text and not isinstance( + child.text, util.AtomicString + ): + self.ancestors.append(child.tag.lower()) + text = child.text + child.text = None + lst = self.__processPlaceholders( + self.__handleInline(text), child + ) + for item in lst: + self.parent_map[item[0]] = child + stack += lst + insertQueue.append((child, lst)) + self.ancestors.pop() + if child.tail: + tail = self.__handleInline(child.tail) + dumby = etree.Element('d') + child.tail = None + tailResult = self.__processPlaceholders(tail, dumby, False) + if dumby.tail: + child.tail = dumby.tail + pos = list(currElement).index(child) + 1 + tailResult.reverse() + for newChild in tailResult: + self.parent_map[newChild[0]] = currElement + currElement.insert(pos, newChild[0]) + if len(child): + self.parent_map[child] = currElement + stack.append((child, self.ancestors[:])) + + for element, lst in insertQueue: + for i, obj in enumerate(lst): + newChild = obj[0] + element.insert(i, newChild) + return tree + + +class PrettifyTreeprocessor(Treeprocessor): + """ Add line breaks to the html document. """ + + def _prettifyETree(self, elem: etree.Element) -> None: + """ Recursively add line breaks to `ElementTree` children. """ + + i = "\n" + if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']: + if (not elem.text or not elem.text.strip()) \ + and len(elem) and self.md.is_block_level(elem[0].tag): + elem.text = i + for e in elem: + if self.md.is_block_level(e.tag): + self._prettifyETree(e) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + + def run(self, root: etree.Element) -> None: + """ Add line breaks to `Element` object and its children. """ + + self._prettifyETree(root) + # Do `<br />`'s separately as they are often in the middle of + # inline content and missed by `_prettifyETree`. + brs = root.iter('br') + for br in brs: + if not br.tail or not br.tail.strip(): + br.tail = '\n' + else: + br.tail = '\n%s' % br.tail + # Clean up extra empty lines at end of code blocks. + pres = root.iter('pre') + for pre in pres: + if len(pre) and pre[0].tag == 'code': + code = pre[0] + # Only prettify code containing text only + if not len(code) and code.text is not None: + code.text = util.AtomicString(code.text.rstrip() + '\n') + + +class UnescapeTreeprocessor(Treeprocessor): + """ Restore escaped chars """ + + RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX)) + + def _unescape(self, m: re.Match[str]) -> str: + return chr(int(m.group(1))) + + def unescape(self, text: str) -> str: + return self.RE.sub(self._unescape, text) + + def run(self, root: etree.Element) -> None: + """ Loop over all elements and unescape all text. """ + for elem in root.iter(): + # Unescape text content + if elem.text and not elem.tag == 'code': + elem.text = self.unescape(elem.text) + # Unescape tail content + if elem.tail: + elem.tail = self.unescape(elem.tail) + # Unescape attribute values + for key, value in elem.items(): + elem.set(key, self.unescape(value)) |