aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/docutils/utils/code_analyzer.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/docutils/utils/code_analyzer.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docutils/utils/code_analyzer.py136
1 files changed, 136 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docutils/utils/code_analyzer.py b/.venv/lib/python3.12/site-packages/docutils/utils/code_analyzer.py
new file mode 100644
index 00000000..cd020bb1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docutils/utils/code_analyzer.py
@@ -0,0 +1,136 @@
+# :Author: Georg Brandl; Lea Wiemann; Günter Milde
+# :Date: $Date: 2022-11-16 15:01:31 +0100 (Mi, 16. Nov 2022) $
+# :Copyright: This module has been placed in the public domain.
+
+"""Lexical analysis of formal languages (i.e. code) using Pygments."""
+
+from docutils import ApplicationError
+try:
+ import pygments
+ from pygments.lexers import get_lexer_by_name
+ from pygments.formatters.html import _get_ttype_class
+ with_pygments = True
+except ImportError:
+ with_pygments = False
+
+# Filter the following token types from the list of class arguments:
+unstyled_tokens = ['token', # Token (base token type)
+ 'text', # Token.Text
+ ''] # short name for Token and Text
+# (Add, e.g., Token.Punctuation with ``unstyled_tokens += 'punctuation'``.)
+
+
+class LexerError(ApplicationError):
+ pass
+
+
+class Lexer:
+ """Parse `code` lines and yield "classified" tokens.
+
+ Arguments
+
+ code -- string of source code to parse,
+ language -- formal language the code is written in,
+ tokennames -- either 'long', 'short', or 'none' (see below).
+
+ Merge subsequent tokens of the same token-type.
+
+ Iterating over an instance yields the tokens as ``(tokentype, value)``
+ tuples. The value of `tokennames` configures the naming of the tokentype:
+
+ 'long': downcased full token type name,
+ 'short': short name defined by pygments.token.STANDARD_TYPES
+ (= class argument used in pygments html output),
+ 'none': skip lexical analysis.
+ """
+
+ def __init__(self, code, language, tokennames='short'):
+ """
+ Set up a lexical analyzer for `code` in `language`.
+ """
+ self.code = code
+ self.language = language
+ self.tokennames = tokennames
+ self.lexer = None
+ # get lexical analyzer for `language`:
+ if language in ('', 'text') or tokennames == 'none':
+ return
+ if not with_pygments:
+ raise LexerError('Cannot analyze code. '
+ 'Pygments package not found.')
+ try:
+ self.lexer = get_lexer_by_name(self.language)
+ except pygments.util.ClassNotFound:
+ raise LexerError('Cannot analyze code. '
+ 'No Pygments lexer found for "%s".' % language)
+ # self.lexer.add_filter('tokenmerge')
+ # Since version 1.2. (released Jan 01, 2010) Pygments has a
+ # TokenMergeFilter. # ``self.merge(tokens)`` in __iter__ could
+ # be replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__.
+ # However, `merge` below also strips a final newline added by pygments.
+ #
+ # self.lexer.add_filter('tokenmerge')
+
+ def merge(self, tokens):
+ """Merge subsequent tokens of same token-type.
+
+ Also strip the final newline (added by pygments).
+ """
+ tokens = iter(tokens)
+ (lasttype, lastval) = next(tokens)
+ for ttype, value in tokens:
+ if ttype is lasttype:
+ lastval += value
+ else:
+ yield lasttype, lastval
+ (lasttype, lastval) = (ttype, value)
+ if lastval.endswith('\n'):
+ lastval = lastval[:-1]
+ if lastval:
+ yield lasttype, lastval
+
+ def __iter__(self):
+ """Parse self.code and yield "classified" tokens.
+ """
+ if self.lexer is None:
+ yield [], self.code
+ return
+ tokens = pygments.lex(self.code, self.lexer)
+ for tokentype, value in self.merge(tokens):
+ if self.tokennames == 'long': # long CSS class args
+ classes = str(tokentype).lower().split('.')
+ else: # short CSS class args
+ classes = [_get_ttype_class(tokentype)]
+ classes = [cls for cls in classes if cls not in unstyled_tokens]
+ yield classes, value
+
+
+class NumberLines:
+ """Insert linenumber-tokens at the start of every code line.
+
+ Arguments
+
+ tokens -- iterable of ``(classes, value)`` tuples
+ startline -- first line number
+ endline -- last line number
+
+ Iterating over an instance yields the tokens with a
+ ``(['ln'], '<the line number>')`` token added for every code line.
+ Multi-line tokens are split."""
+
+ def __init__(self, tokens, startline, endline):
+ self.tokens = tokens
+ self.startline = startline
+ # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d '
+ self.fmt_str = '%%%dd ' % len(str(endline))
+
+ def __iter__(self):
+ lineno = self.startline
+ yield ['ln'], self.fmt_str % lineno
+ for ttype, value in self.tokens:
+ lines = value.split('\n')
+ for line in lines[:-1]:
+ yield ttype, line + '\n'
+ lineno += 1
+ yield ['ln'], self.fmt_str % lineno
+ yield ttype, lines[-1]