1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
|
# Abbreviation Extension for Python-Markdown
# ==========================================
# This extension adds abbreviation handling to Python-Markdown.
# See https://Python-Markdown.github.io/extensions/abbreviations
# for documentation.
# Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/)
# and [Seemant Kulleen](http://www.kulleen.org/)
# All changes Copyright 2008-2014 The Python Markdown Project
# License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
This extension adds abbreviation handling to Python-Markdown.
See the [documentation](https://Python-Markdown.github.io/extensions/abbreviations)
for details.
"""
from __future__ import annotations
from . import Extension
from ..blockprocessors import BlockProcessor
from ..inlinepatterns import InlineProcessor
from ..treeprocessors import Treeprocessor
from ..util import AtomicString, deprecated
from typing import TYPE_CHECKING
import re
import xml.etree.ElementTree as etree
if TYPE_CHECKING: # pragma: no cover
from .. import Markdown
from ..blockparsers import BlockParser
class AbbrExtension(Extension):
""" Abbreviation Extension for Python-Markdown. """
def __init__(self, **kwargs):
""" Initiate Extension and set up configs. """
self.config = {
'glossary': [
{},
'A dictionary where the `key` is the abbreviation and the `value` is the definition.'
"Default: `{}`"
],
}
""" Default configuration options. """
super().__init__(**kwargs)
self.abbrs = {}
self.glossary = {}
def reset(self):
""" Clear all previously defined abbreviations. """
self.abbrs.clear()
if (self.glossary):
self.abbrs.update(self.glossary)
def reset_glossary(self):
""" Clear all abbreviations from the glossary. """
self.glossary.clear()
def load_glossary(self, dictionary: dict[str, str]):
"""Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten."""
if dictionary:
self.glossary = {**dictionary, **self.glossary}
def extendMarkdown(self, md):
""" Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
if (self.config['glossary'][0]):
self.load_glossary(self.config['glossary'][0])
self.abbrs.update(self.glossary)
md.registerExtension(self)
md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)
md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16)
class AbbrTreeprocessor(Treeprocessor):
""" Replace abbreviation text with `<abbr>` elements. """
def __init__(self, md: Markdown | None = None, abbrs: dict | None = None):
self.abbrs: dict = abbrs if abbrs is not None else {}
self.RE: re.RegexObject | None = None
super().__init__(md)
def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -> None:
''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
for child in reversed(el):
self.iter_element(child, el)
if text := el.text:
for m in reversed(list(self.RE.finditer(text))):
if self.abbrs[m.group(0)]:
abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]})
abbr.text = AtomicString(m.group(0))
abbr.tail = text[m.end():]
el.insert(0, abbr)
text = text[:m.start()]
el.text = text
if parent is not None and el.tail:
tail = el.tail
index = list(parent).index(el) + 1
for m in reversed(list(self.RE.finditer(tail))):
abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]})
abbr.text = AtomicString(m.group(0))
abbr.tail = tail[m.end():]
parent.insert(index, abbr)
tail = tail[:m.start()]
el.tail = tail
def run(self, root: etree.Element) -> etree.Element | None:
''' Step through tree to find known abbreviations. '''
if not self.abbrs:
# No abbreviations defined. Skip running processor.
return
# Build and compile regex
abbr_list = list(self.abbrs.keys())
abbr_list.sort(key=len, reverse=True)
self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in abbr_list) })\\b")
# Step through tree and modify on matches
self.iter_element(root)
class AbbrBlockprocessor(BlockProcessor):
""" Parse text for abbreviation references. """
RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
def __init__(self, parser: BlockParser, abbrs: dict):
self.abbrs: dict = abbrs
super().__init__(parser)
def test(self, parent: etree.Element, block: str) -> bool:
return True
def run(self, parent: etree.Element, blocks: list[str]) -> bool:
"""
Find and remove all abbreviation references from the text.
Each reference is added to the abbreviation collection.
"""
block = blocks.pop(0)
m = self.RE.search(block)
if m:
abbr = m.group('abbr').strip()
title = m.group('title').strip()
if title and abbr:
if title == "''" or title == '""':
self.abbrs.pop(abbr)
else:
self.abbrs[abbr] = title
if block[m.end():].strip():
# Add any content after match back to blocks as separate block
blocks.insert(0, block[m.end():].lstrip('\n'))
if block[:m.start()].strip():
# Add any content before match back to blocks as separate block
blocks.insert(0, block[:m.start()].rstrip('\n'))
return True
# No match. Restore block.
blocks.insert(0, block)
return False
AbbrPreprocessor = deprecated("This class has been renamed to `AbbrBlockprocessor`.")(AbbrBlockprocessor)
@deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.")
class AbbrInlineProcessor(InlineProcessor):
""" Abbreviation inline pattern. """
def __init__(self, pattern: str, title: str):
super().__init__(pattern)
self.title = title
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
abbr = etree.Element('abbr')
abbr.text = AtomicString(m.group('abbr'))
abbr.set('title', self.title)
return abbr, m.start(0), m.end(0)
def makeExtension(**kwargs): # pragma: no cover
return AbbrExtension(**kwargs)
|