aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/markdown/treeprocessors.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/markdown/treeprocessors.py')
-rw-r--r--.venv/lib/python3.12/site-packages/markdown/treeprocessors.py476
1 files changed, 476 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/markdown/treeprocessors.py b/.venv/lib/python3.12/site-packages/markdown/treeprocessors.py
new file mode 100644
index 00000000..83630999
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/markdown/treeprocessors.py
@@ -0,0 +1,476 @@
+# Python Markdown
+
+# A Python implementation of John Gruber's Markdown.
+
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
+
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
+
+# License: BSD (see LICENSE.md for details).
+
+"""
+Tree processors manipulate the tree created by block processors. They can even create an entirely
+new `ElementTree` object. This is an excellent place for creating summaries, adding collected
+references, or last minute adjustments.
+
+"""
+
+from __future__ import annotations
+
+import re
+import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING, Any
+from . import util
+from . import inlinepatterns
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
+
+
+def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]:
+ """ Build the default `treeprocessors` for Markdown. """
+ treeprocessors = util.Registry()
+ treeprocessors.register(InlineProcessor(md), 'inline', 20)
+ treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
+ treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0)
+ return treeprocessors
+
+
+def isString(s: object) -> bool:
+ """ Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """
+ if not isinstance(s, util.AtomicString):
+ return isinstance(s, str)
+ return False
+
+
+class Treeprocessor(util.Processor):
+ """
+ `Treeprocessor`s are run on the `ElementTree` object before serialization.
+
+ Each `Treeprocessor` implements a `run` method that takes a pointer to an
+ `Element` and modifies it as necessary.
+
+ `Treeprocessors` must extend `markdown.Treeprocessor`.
+
+ """
+ def run(self, root: etree.Element) -> etree.Element | None:
+ """
+ Subclasses of `Treeprocessor` should implement a `run` method, which
+ takes a root `Element`. This method can return another `Element`
+ object, and the existing root `Element` will be replaced, or it can
+ modify the current tree and return `None`.
+ """
+ pass # pragma: no cover
+
+
+class InlineProcessor(Treeprocessor):
+ """
+ A `Treeprocessor` that traverses a tree, applying inline patterns.
+ """
+
+ def __init__(self, md: Markdown):
+ self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
+ self.__placeholder_suffix = util.ETX
+ self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
+ + len(self.__placeholder_suffix)
+ self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
+ self.md = md
+ self.inlinePatterns = md.inlinePatterns
+ self.ancestors: list[str] = []
+
+ def __makePlaceholder(self, type: str) -> tuple[str, str]:
+ """ Generate a placeholder """
+ id = "%04d" % len(self.stashed_nodes)
+ hash = util.INLINE_PLACEHOLDER % id
+ return hash, id
+
+ def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]:
+ """
+ Extract id from data string, start from index.
+
+ Arguments:
+ data: String.
+ index: Index, from which we start search.
+
+ Returns:
+ Placeholder id and string index, after the found placeholder.
+
+ """
+ m = self.__placeholder_re.search(data, index)
+ if m:
+ return m.group(1), m.end()
+ else:
+ return None, index + 1
+
+ def __stashNode(self, node: etree.Element | str, type: str) -> str:
+ """ Add node to stash. """
+ placeholder, id = self.__makePlaceholder(type)
+ self.stashed_nodes[id] = node
+ return placeholder
+
+ def __handleInline(self, data: str, patternIndex: int = 0) -> str:
+ """
+ Process string with inline patterns and replace it with placeholders.
+
+ Arguments:
+ data: A line of Markdown text.
+ patternIndex: The index of the `inlinePattern` to start with.
+
+ Returns:
+ String with placeholders.
+
+ """
+ if not isinstance(data, util.AtomicString):
+ startIndex = 0
+ count = len(self.inlinePatterns)
+ while patternIndex < count:
+ data, matched, startIndex = self.__applyPattern(
+ self.inlinePatterns[patternIndex], data, patternIndex, startIndex
+ )
+ if not matched:
+ patternIndex += 1
+ return data
+
+ def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None:
+ """
+ Process placeholders in `Element.text` or `Element.tail`
+ of Elements popped from `self.stashed_nodes`.
+
+ Arguments:
+ node: Parent node.
+ subnode: Processing node.
+ isText: Boolean variable, True - it's text, False - it's a tail.
+
+ """
+ if isText:
+ text = subnode.text
+ subnode.text = None
+ else:
+ text = subnode.tail
+ subnode.tail = None
+
+ childResult = self.__processPlaceholders(text, subnode, isText)
+
+ if not isText and node is not subnode:
+ pos = list(node).index(subnode) + 1
+ else:
+ pos = 0
+
+ childResult.reverse()
+ for newChild in childResult:
+ node.insert(pos, newChild[0])
+
+ def __processPlaceholders(
+ self,
+ data: str | None,
+ parent: etree.Element,
+ isText: bool = True
+ ) -> list[tuple[etree.Element, list[str]]]:
+ """
+ Process string with placeholders and generate `ElementTree` tree.
+
+ Arguments:
+ data: String with placeholders instead of `ElementTree` elements.
+ parent: Element, which contains processing inline data.
+ isText: Boolean variable, True - it's text, False - it's a tail.
+
+ Returns:
+ List with `ElementTree` elements with applied inline patterns.
+
+ """
+ def linkText(text: str | None) -> None:
+ if text:
+ if result:
+ if result[-1][0].tail:
+ result[-1][0].tail += text
+ else:
+ result[-1][0].tail = text
+ elif not isText:
+ if parent.tail:
+ parent.tail += text
+ else:
+ parent.tail = text
+ else:
+ if parent.text:
+ parent.text += text
+ else:
+ parent.text = text
+ result = []
+ strartIndex = 0
+ while data:
+ index = data.find(self.__placeholder_prefix, strartIndex)
+ if index != -1:
+ id, phEndIndex = self.__findPlaceholder(data, index)
+
+ if id in self.stashed_nodes:
+ node = self.stashed_nodes.get(id)
+
+ if index > 0:
+ text = data[strartIndex:index]
+ linkText(text)
+
+ if not isinstance(node, str): # it's Element
+ for child in [node] + list(node):
+ if child.tail:
+ if child.tail.strip():
+ self.__processElementText(
+ node, child, False
+ )
+ if child.text:
+ if child.text.strip():
+ self.__processElementText(child, child)
+ else: # it's just a string
+ linkText(node)
+ strartIndex = phEndIndex
+ continue
+
+ strartIndex = phEndIndex
+ result.append((node, self.ancestors[:]))
+
+ else: # wrong placeholder
+ end = index + len(self.__placeholder_prefix)
+ linkText(data[strartIndex:end])
+ strartIndex = end
+ else:
+ text = data[strartIndex:]
+ if isinstance(data, util.AtomicString):
+ # We don't want to loose the `AtomicString`
+ text = util.AtomicString(text)
+ linkText(text)
+ data = ""
+
+ return result
+
+ def __applyPattern(
+ self,
+ pattern: inlinepatterns.Pattern,
+ data: str,
+ patternIndex: int,
+ startIndex: int = 0
+ ) -> tuple[str, bool, int]:
+ """
+ Check if the line fits the pattern, create the necessary
+ elements, add it to `stashed_nodes`.
+
+ Arguments:
+ data: The text to be processed.
+ pattern: The pattern to be checked.
+ patternIndex: Index of current pattern.
+ startIndex: String index, from which we start searching.
+
+ Returns:
+ String with placeholders instead of `ElementTree` elements.
+
+ """
+ new_style = isinstance(pattern, inlinepatterns.InlineProcessor)
+
+ for exclude in pattern.ANCESTOR_EXCLUDES:
+ if exclude.lower() in self.ancestors:
+ return data, False, 0
+
+ if new_style:
+ match = None
+ # Since `handleMatch` may reject our first match,
+ # we iterate over the buffer looking for matches
+ # until we can't find any more.
+ for match in pattern.getCompiledRegExp().finditer(data, startIndex):
+ node, start, end = pattern.handleMatch(match, data)
+ if start is None or end is None:
+ startIndex += match.end(0)
+ match = None
+ continue
+ break
+ else: # pragma: no cover
+ match = pattern.getCompiledRegExp().match(data[startIndex:])
+ leftData = data[:startIndex]
+
+ if not match:
+ return data, False, 0
+
+ if not new_style: # pragma: no cover
+ node = pattern.handleMatch(match)
+ start = match.start(0)
+ end = match.end(0)
+
+ if node is None:
+ return data, True, end
+
+ if not isinstance(node, str):
+ if not isinstance(node.text, util.AtomicString):
+ # We need to process current node too
+ for child in [node] + list(node):
+ if not isString(node):
+ if child.text:
+ self.ancestors.append(child.tag.lower())
+ child.text = self.__handleInline(
+ child.text, patternIndex + 1
+ )
+ self.ancestors.pop()
+ if child.tail:
+ child.tail = self.__handleInline(
+ child.tail, patternIndex
+ )
+
+ placeholder = self.__stashNode(node, pattern.type())
+
+ if new_style:
+ return "{}{}{}".format(data[:start],
+ placeholder, data[end:]), True, 0
+ else: # pragma: no cover
+ return "{}{}{}{}".format(leftData,
+ match.group(1),
+ placeholder, match.groups()[-1]), True, 0
+
+ def __build_ancestors(self, parent: etree.Element | None, parents: list[str]) -> None:
+ """Build the ancestor list."""
+ ancestors = []
+ while parent is not None:
+ if parent is not None:
+ ancestors.append(parent.tag.lower())
+ parent = self.parent_map.get(parent)
+ ancestors.reverse()
+ parents.extend(ancestors)
+
+ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.Element:
+ """Apply inline patterns to a parsed Markdown tree.
+
+ Iterate over `Element`, find elements with inline tag, apply inline
+ patterns and append newly created Elements to tree. To avoid further
+ processing of string with inline patterns, instead of normal string,
+ use subclass [`AtomicString`][markdown.util.AtomicString]:
+
+ node.text = markdown.util.AtomicString("This will not be processed.")
+
+ Arguments:
+ tree: `Element` object, representing Markdown tree.
+ ancestors: List of parent tag names that precede the tree node (if needed).
+
+ Returns:
+ An element tree object with applied inline patterns.
+
+ """
+ self.stashed_nodes: dict[str, etree.Element | str] = {}
+
+ # Ensure a valid parent list, but copy passed in lists
+ # to ensure we don't have the user accidentally change it on us.
+ tree_parents = [] if ancestors is None else ancestors[:]
+
+ self.parent_map = {c: p for p in tree.iter() for c in p}
+ stack = [(tree, tree_parents)]
+
+ while stack:
+ currElement, parents = stack.pop()
+
+ self.ancestors = parents
+ self.__build_ancestors(currElement, self.ancestors)
+
+ insertQueue = []
+ for child in currElement:
+ if child.text and not isinstance(
+ child.text, util.AtomicString
+ ):
+ self.ancestors.append(child.tag.lower())
+ text = child.text
+ child.text = None
+ lst = self.__processPlaceholders(
+ self.__handleInline(text), child
+ )
+ for item in lst:
+ self.parent_map[item[0]] = child
+ stack += lst
+ insertQueue.append((child, lst))
+ self.ancestors.pop()
+ if child.tail:
+ tail = self.__handleInline(child.tail)
+ dumby = etree.Element('d')
+ child.tail = None
+ tailResult = self.__processPlaceholders(tail, dumby, False)
+ if dumby.tail:
+ child.tail = dumby.tail
+ pos = list(currElement).index(child) + 1
+ tailResult.reverse()
+ for newChild in tailResult:
+ self.parent_map[newChild[0]] = currElement
+ currElement.insert(pos, newChild[0])
+ if len(child):
+ self.parent_map[child] = currElement
+ stack.append((child, self.ancestors[:]))
+
+ for element, lst in insertQueue:
+ for i, obj in enumerate(lst):
+ newChild = obj[0]
+ element.insert(i, newChild)
+ return tree
+
+
+class PrettifyTreeprocessor(Treeprocessor):
+ """ Add line breaks to the html document. """
+
+ def _prettifyETree(self, elem: etree.Element) -> None:
+ """ Recursively add line breaks to `ElementTree` children. """
+
+ i = "\n"
+ if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']:
+ if (not elem.text or not elem.text.strip()) \
+ and len(elem) and self.md.is_block_level(elem[0].tag):
+ elem.text = i
+ for e in elem:
+ if self.md.is_block_level(e.tag):
+ self._prettifyETree(e)
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+
+ def run(self, root: etree.Element) -> None:
+ """ Add line breaks to `Element` object and its children. """
+
+ self._prettifyETree(root)
+ # Do `<br />`'s separately as they are often in the middle of
+ # inline content and missed by `_prettifyETree`.
+ brs = root.iter('br')
+ for br in brs:
+ if not br.tail or not br.tail.strip():
+ br.tail = '\n'
+ else:
+ br.tail = '\n%s' % br.tail
+ # Clean up extra empty lines at end of code blocks.
+ pres = root.iter('pre')
+ for pre in pres:
+ if len(pre) and pre[0].tag == 'code':
+ code = pre[0]
+ # Only prettify code containing text only
+ if not len(code) and code.text is not None:
+ code.text = util.AtomicString(code.text.rstrip() + '\n')
+
+
+class UnescapeTreeprocessor(Treeprocessor):
+ """ Restore escaped chars """
+
+ RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
+
+ def _unescape(self, m: re.Match[str]) -> str:
+ return chr(int(m.group(1)))
+
+ def unescape(self, text: str) -> str:
+ return self.RE.sub(self._unescape, text)
+
+ def run(self, root: etree.Element) -> None:
+ """ Loop over all elements and unescape all text. """
+ for elem in root.iter():
+ # Unescape text content
+ if elem.text and not elem.tag == 'code':
+ elem.text = self.unescape(elem.text)
+ # Unescape tail content
+ if elem.tail:
+ elem.tail = self.unescape(elem.tail)
+ # Unescape attribute values
+ for key, value in elem.items():
+ elem.set(key, self.unescape(value))