diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py b/.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py new file mode 100644 index 00000000..9206d11e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py @@ -0,0 +1,203 @@ +# Attribute List Extension for Python-Markdown +# ============================================ + +# Adds attribute list syntax. Inspired by +# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s +# feature of the same name. + +# See https://Python-Markdown.github.io/extensions/attr_list +# for documentation. + +# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). + +# All changes Copyright 2011-2014 The Python Markdown Project + +# License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + Adds attribute list syntax. Inspired by +[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s +feature of the same name. + +See the [documentation](https://Python-Markdown.github.io/extensions/attr_list) +for details. +""" + +from __future__ import annotations +from typing import TYPE_CHECKING + +from . import Extension +from ..treeprocessors import Treeprocessor +import re + +if TYPE_CHECKING: # pragma: no cover + from xml.etree.ElementTree import Element + + +def _handle_double_quote(s, t): + k, v = t.split('=', 1) + return k, v.strip('"') + + +def _handle_single_quote(s, t): + k, v = t.split('=', 1) + return k, v.strip("'") + + +def _handle_key_value(s, t): + return t.split('=', 1) + + +def _handle_word(s, t): + if t.startswith('.'): + return '.', t[1:] + if t.startswith('#'): + return 'id', t[1:] + return t, t + + +_scanner = re.Scanner([ + (r'[^ =}]+=".*?"', _handle_double_quote), + (r"[^ =}]+='.*?'", _handle_single_quote), + (r'[^ =}]+=[^ =}]+', _handle_key_value), + (r'[^ =}]+', _handle_word), + (r' ', None) +]) + + +def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]: + """ Parse attribute list and return a list of attribute tuples. + + Additionally, return any text that remained after a curly brace. In typical cases, its presence + should mean that the input does not match the intended attribute list syntax. + """ + attrs, remainder = _scanner.scan(attrs_string) + # To keep historic behavior, discard all unparsable text prior to '}'. + index = remainder.find('}') + remainder = remainder[index:] if index != -1 else '' + return attrs, remainder + + +def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover + """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """ + return get_attrs_and_remainder(str)[0] + + +def isheader(elem: Element) -> bool: + return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] + + +class AttrListTreeprocessor(Treeprocessor): + + BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}' + HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) + BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) + INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) + NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' + r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' + r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' + r'\uf900-\ufdcf\ufdf0-\ufffd' + r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') + + def run(self, doc: Element) -> None: + for elem in doc.iter(): + if self.md.is_block_level(elem.tag): + # Block level: check for `attrs` on last line of text + RE = self.BLOCK_RE + if isheader(elem) or elem.tag in ['dt', 'td', 'th']: + # header, def-term, or table cell: check for attributes at end of element + RE = self.HEADER_RE + if len(elem) and elem.tag == 'li': + # special case list items. children may include a `ul` or `ol`. + pos = None + # find the `ul` or `ol` position + for i, child in enumerate(elem): + if child.tag in ['ul', 'ol']: + pos = i + break + if pos is None and elem[-1].tail: + # use tail of last child. no `ul` or `ol`. + m = RE.search(elem[-1].tail) + if m: + if not self.assign_attrs(elem, m.group(1), strict=True): + elem[-1].tail = elem[-1].tail[:m.start()] + elif pos is not None and pos > 0 and elem[pos-1].tail: + # use tail of last child before `ul` or `ol` + m = RE.search(elem[pos-1].tail) + if m: + if not self.assign_attrs(elem, m.group(1), strict=True): + elem[pos-1].tail = elem[pos-1].tail[:m.start()] + elif elem.text: + # use text. `ul` is first child. + m = RE.search(elem.text) + if m: + if not self.assign_attrs(elem, m.group(1), strict=True): + elem.text = elem.text[:m.start()] + elif len(elem) and elem[-1].tail: + # has children. Get from tail of last child + m = RE.search(elem[-1].tail) + if m: + if not self.assign_attrs(elem, m.group(1), strict=True): + elem[-1].tail = elem[-1].tail[:m.start()] + if isheader(elem): + # clean up trailing #s + elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() + elif elem.text: + # no children. Get from text. + m = RE.search(elem.text) + if m: + if not self.assign_attrs(elem, m.group(1), strict=True): + elem.text = elem.text[:m.start()] + if isheader(elem): + # clean up trailing #s + elem.text = elem.text.rstrip('#').rstrip() + else: + # inline: check for `attrs` at start of tail + if elem.tail: + m = self.INLINE_RE.match(elem.tail) + if m: + remainder = self.assign_attrs(elem, m.group(1)) + elem.tail = elem.tail[m.end():] + remainder + + def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str: + """ Assign `attrs` to element. + + If the `attrs_string` has an extra closing curly brace, the remaining text is returned. + + The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`. + """ + attrs, remainder = get_attrs_and_remainder(attrs_string) + if strict and remainder: + return remainder + + for k, v in attrs: + if k == '.': + # add to class + cls = elem.get('class') + if cls: + elem.set('class', '{} {}'.format(cls, v)) + else: + elem.set('class', v) + else: + # assign attribute `k` with `v` + elem.set(self.sanitize_name(k), v) + # The text that we initially over-matched will be put back. + return remainder + + def sanitize_name(self, name: str) -> str: + """ + Sanitize name as 'an XML Name, minus the `:`.' + See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>. + """ + return self.NAME_RE.sub('_', name) + + +class AttrListExtension(Extension): + """ Attribute List extension for Python-Markdown """ + def extendMarkdown(self, md): + md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) + md.registerExtension(self) + + +def makeExtension(**kwargs): # pragma: no cover + return AttrListExtension(**kwargs) |