about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py')
-rw-r--r--.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py203
1 files changed, 203 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py b/.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py
new file mode 100644
index 00000000..9206d11e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/markdown/extensions/attr_list.py
@@ -0,0 +1,203 @@
+# Attribute List Extension for Python-Markdown
+# ============================================
+
+# Adds attribute list syntax. Inspired by
+# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
+# feature of the same name.
+
+# See https://Python-Markdown.github.io/extensions/attr_list
+# for documentation.
+
+# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
+
+# All changes Copyright 2011-2014 The Python Markdown Project
+
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
+
+"""
+ Adds attribute list syntax. Inspired by
+[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
+feature of the same name.
+
+See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)
+for details.
+"""
+
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
+from . import Extension
+from ..treeprocessors import Treeprocessor
+import re
+
+if TYPE_CHECKING:  # pragma: no cover
+    from xml.etree.ElementTree import Element
+
+
+def _handle_double_quote(s, t):
+    k, v = t.split('=', 1)
+    return k, v.strip('"')
+
+
+def _handle_single_quote(s, t):
+    k, v = t.split('=', 1)
+    return k, v.strip("'")
+
+
+def _handle_key_value(s, t):
+    return t.split('=', 1)
+
+
+def _handle_word(s, t):
+    if t.startswith('.'):
+        return '.', t[1:]
+    if t.startswith('#'):
+        return 'id', t[1:]
+    return t, t
+
+
+_scanner = re.Scanner([
+    (r'[^ =}]+=".*?"', _handle_double_quote),
+    (r"[^ =}]+='.*?'", _handle_single_quote),
+    (r'[^ =}]+=[^ =}]+', _handle_key_value),
+    (r'[^ =}]+', _handle_word),
+    (r' ', None)
+])
+
+
+def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
+    """ Parse attribute list and return a list of attribute tuples.
+
+    Additionally, return any text that remained after a curly brace. In typical cases, its presence
+    should mean that the input does not match the intended attribute list syntax.
+    """
+    attrs, remainder = _scanner.scan(attrs_string)
+    # To keep historic behavior, discard all unparsable text prior to '}'.
+    index = remainder.find('}')
+    remainder = remainder[index:] if index != -1 else ''
+    return attrs, remainder
+
+
+def get_attrs(str: str) -> list[tuple[str, str]]:  # pragma: no cover
+    """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """
+    return get_attrs_and_remainder(str)[0]
+
+
+def isheader(elem: Element) -> bool:
+    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
+
+
+class AttrListTreeprocessor(Treeprocessor):
+
+    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
+    HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
+    BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
+    INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
+    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
+                         r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
+                         r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
+                         r'\uf900-\ufdcf\ufdf0-\ufffd'
+                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
+
+    def run(self, doc: Element) -> None:
+        for elem in doc.iter():
+            if self.md.is_block_level(elem.tag):
+                # Block level: check for `attrs` on last line of text
+                RE = self.BLOCK_RE
+                if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
+                    # header, def-term, or table cell: check for attributes at end of element
+                    RE = self.HEADER_RE
+                if len(elem) and elem.tag == 'li':
+                    # special case list items. children may include a `ul` or `ol`.
+                    pos = None
+                    # find the `ul` or `ol` position
+                    for i, child in enumerate(elem):
+                        if child.tag in ['ul', 'ol']:
+                            pos = i
+                            break
+                    if pos is None and elem[-1].tail:
+                        # use tail of last child. no `ul` or `ol`.
+                        m = RE.search(elem[-1].tail)
+                        if m:
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem[-1].tail = elem[-1].tail[:m.start()]
+                    elif pos is not None and pos > 0 and elem[pos-1].tail:
+                        # use tail of last child before `ul` or `ol`
+                        m = RE.search(elem[pos-1].tail)
+                        if m:
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem[pos-1].tail = elem[pos-1].tail[:m.start()]
+                    elif elem.text:
+                        # use text. `ul` is first child.
+                        m = RE.search(elem.text)
+                        if m:
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem.text = elem.text[:m.start()]
+                elif len(elem) and elem[-1].tail:
+                    # has children. Get from tail of last child
+                    m = RE.search(elem[-1].tail)
+                    if m:
+                        if not self.assign_attrs(elem, m.group(1), strict=True):
+                            elem[-1].tail = elem[-1].tail[:m.start()]
+                            if isheader(elem):
+                                # clean up trailing #s
+                                elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
+                elif elem.text:
+                    # no children. Get from text.
+                    m = RE.search(elem.text)
+                    if m:
+                        if not self.assign_attrs(elem, m.group(1), strict=True):
+                            elem.text = elem.text[:m.start()]
+                            if isheader(elem):
+                                # clean up trailing #s
+                                elem.text = elem.text.rstrip('#').rstrip()
+            else:
+                # inline: check for `attrs` at start of tail
+                if elem.tail:
+                    m = self.INLINE_RE.match(elem.tail)
+                    if m:
+                        remainder = self.assign_attrs(elem, m.group(1))
+                        elem.tail = elem.tail[m.end():] + remainder
+
+    def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
+        """ Assign `attrs` to element.
+
+        If the `attrs_string` has an extra closing curly brace, the remaining text is returned.
+
+        The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
+        """
+        attrs, remainder = get_attrs_and_remainder(attrs_string)
+        if strict and remainder:
+            return remainder
+
+        for k, v in attrs:
+            if k == '.':
+                # add to class
+                cls = elem.get('class')
+                if cls:
+                    elem.set('class', '{} {}'.format(cls, v))
+                else:
+                    elem.set('class', v)
+            else:
+                # assign attribute `k` with `v`
+                elem.set(self.sanitize_name(k), v)
+        # The text that we initially over-matched will be put back.
+        return remainder
+
+    def sanitize_name(self, name: str) -> str:
+        """
+        Sanitize name as 'an XML Name, minus the `:`.'
+        See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
+        """
+        return self.NAME_RE.sub('_', name)
+
+
+class AttrListExtension(Extension):
+    """ Attribute List extension for Python-Markdown """
+    def extendMarkdown(self, md):
+        md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
+        md.registerExtension(self)
+
+
+def makeExtension(**kwargs):  # pragma: no cover
+    return AttrListExtension(**kwargs)