about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/docutils/utils/__init__.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docutils/utils/__init__.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/docutils/utils/__init__.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docutils/utils/__init__.py861
1 files changed, 861 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docutils/utils/__init__.py b/.venv/lib/python3.12/site-packages/docutils/utils/__init__.py
new file mode 100644
index 00000000..777f8013
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docutils/utils/__init__.py
@@ -0,0 +1,861 @@
+# $Id: __init__.py 9544 2024-02-17 10:37:45Z milde $
+# Author: David Goodger <goodger@python.org>
+# Copyright: This module has been placed in the public domain.
+
+"""
+Miscellaneous utilities for the documentation utilities.
+"""
+
+__docformat__ = 'reStructuredText'
+
+import sys
+import os
+import os.path
+from pathlib import PurePath, Path
+import re
+import itertools
+import warnings
+import unicodedata
+
+from docutils import ApplicationError, DataError, __version_info__
+from docutils import io, nodes
+# for backwards compatibility
+from docutils.nodes import unescape  # noqa: F401
+
+
+class SystemMessage(ApplicationError):
+
+    def __init__(self, system_message, level):
+        Exception.__init__(self, system_message.astext())
+        self.level = level
+
+
+class SystemMessagePropagation(ApplicationError):
+    pass
+
+
+class Reporter:
+
+    """
+    Info/warning/error reporter and ``system_message`` element generator.
+
+    Five levels of system messages are defined, along with corresponding
+    methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
+
+    There is typically one Reporter object per process.  A Reporter object is
+    instantiated with thresholds for reporting (generating warnings) and
+    halting processing (raising exceptions), a switch to turn debug output on
+    or off, and an I/O stream for warnings.  These are stored as instance
+    attributes.
+
+    When a system message is generated, its level is compared to the stored
+    thresholds, and a warning or error is generated as appropriate.  Debug
+    messages are produced if the stored debug switch is on, independently of
+    other thresholds.  Message output is sent to the stored warning stream if
+    not set to ''.
+
+    The Reporter class also employs a modified form of the "Observer" pattern
+    [GoF95]_ to track system messages generated.  The `attach_observer` method
+    should be called before parsing, with a bound method or function which
+    accepts system messages.  The observer can be removed with
+    `detach_observer`, and another added in its place.
+
+    .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
+       Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
+       1995.
+    """
+
+    levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
+    """List of names for system message levels, indexed by level."""
+
+    # system message level constants:
+    (DEBUG_LEVEL,
+     INFO_LEVEL,
+     WARNING_LEVEL,
+     ERROR_LEVEL,
+     SEVERE_LEVEL) = range(5)
+
+    def __init__(self, source, report_level, halt_level, stream=None,
+                 debug=False, encoding=None, error_handler='backslashreplace'):
+        """
+        :Parameters:
+            - `source`: The path to or description of the source data.
+            - `report_level`: The level at or above which warning output will
+              be sent to `stream`.
+            - `halt_level`: The level at or above which `SystemMessage`
+              exceptions will be raised, halting execution.
+            - `debug`: Show debug (level=0) system messages?
+            - `stream`: Where warning output is sent.  Can be file-like (has a
+              ``.write`` method), a string (file name, opened for writing),
+              '' (empty string) or `False` (for discarding all stream messages)
+              or `None` (implies `sys.stderr`; default).
+            - `encoding`: The output encoding.
+            - `error_handler`: The error handler for stderr output encoding.
+        """
+
+        self.source = source
+        """The path to or description of the source data."""
+
+        self.error_handler = error_handler
+        """The character encoding error handler."""
+
+        self.debug_flag = debug
+        """Show debug (level=0) system messages?"""
+
+        self.report_level = report_level
+        """The level at or above which warning output will be sent
+        to `self.stream`."""
+
+        self.halt_level = halt_level
+        """The level at or above which `SystemMessage` exceptions
+        will be raised, halting execution."""
+
+        if not isinstance(stream, io.ErrorOutput):
+            stream = io.ErrorOutput(stream, encoding, error_handler)
+
+        self.stream = stream
+        """Where warning output is sent."""
+
+        self.encoding = encoding or getattr(stream, 'encoding', 'ascii')
+        """The output character encoding."""
+
+        self.observers = []
+        """List of bound methods or functions to call with each system_message
+        created."""
+
+        self.max_level = -1
+        """The highest level system message generated so far."""
+
+    def set_conditions(self, category, report_level, halt_level,
+                       stream=None, debug=False):
+        warnings.warn('docutils.utils.Reporter.set_conditions() deprecated; '
+                      'Will be removed in Docutils 0.21 or later. '
+                      'Set attributes via configuration settings or directly.',
+                      DeprecationWarning, stacklevel=2)
+        self.report_level = report_level
+        self.halt_level = halt_level
+        if not isinstance(stream, io.ErrorOutput):
+            stream = io.ErrorOutput(stream, self.encoding, self.error_handler)
+        self.stream = stream
+        self.debug_flag = debug
+
+    def attach_observer(self, observer):
+        """
+        The `observer` parameter is a function or bound method which takes one
+        argument, a `nodes.system_message` instance.
+        """
+        self.observers.append(observer)
+
+    def detach_observer(self, observer):
+        self.observers.remove(observer)
+
+    def notify_observers(self, message):
+        for observer in self.observers:
+            observer(message)
+
+    def system_message(self, level, message, *children, **kwargs):
+        """
+        Return a system_message object.
+
+        Raise an exception or generate a warning if appropriate.
+        """
+        # `message` can be a `str` or `Exception` instance.
+        if isinstance(message, Exception):
+            message = str(message)
+
+        attributes = kwargs.copy()
+        if 'base_node' in kwargs:
+            source, line = get_source_line(kwargs['base_node'])
+            del attributes['base_node']
+            if source is not None:
+                attributes.setdefault('source', source)
+            if line is not None:
+                attributes.setdefault('line', line)
+                # assert source is not None, "line- but no source-argument"
+        if 'source' not in attributes:
+            # 'line' is absolute line number
+            try:
+                source, line = self.get_source_and_line(attributes.get('line'))
+            except AttributeError:
+                source, line = None, None
+            if source is not None:
+                attributes['source'] = source
+            if line is not None:
+                attributes['line'] = line
+        # assert attributes['line'] is not None, (message, kwargs)
+        # assert attributes['source'] is not None, (message, kwargs)
+        attributes.setdefault('source', self.source)
+
+        msg = nodes.system_message(message, level=level,
+                                   type=self.levels[level],
+                                   *children, **attributes)
+        if self.stream and (level >= self.report_level
+                            or self.debug_flag and level == self.DEBUG_LEVEL
+                            or level >= self.halt_level):
+            self.stream.write(msg.astext() + '\n')
+        if level >= self.halt_level:
+            raise SystemMessage(msg, level)
+        if level > self.DEBUG_LEVEL or self.debug_flag:
+            self.notify_observers(msg)
+        self.max_level = max(level, self.max_level)
+        return msg
+
+    def debug(self, *args, **kwargs):
+        """
+        Level-0, "DEBUG": an internal reporting issue. Typically, there is no
+        effect on the processing. Level-0 system messages are handled
+        separately from the others.
+        """
+        if self.debug_flag:
+            return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
+
+    def info(self, *args, **kwargs):
+        """
+        Level-1, "INFO": a minor issue that can be ignored. Typically there is
+        no effect on processing, and level-1 system messages are not reported.
+        """
+        return self.system_message(self.INFO_LEVEL, *args, **kwargs)
+
+    def warning(self, *args, **kwargs):
+        """
+        Level-2, "WARNING": an issue that should be addressed. If ignored,
+        there may be unpredictable problems with the output.
+        """
+        return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
+
+    def error(self, *args, **kwargs):
+        """
+        Level-3, "ERROR": an error that should be addressed. If ignored, the
+        output will contain errors.
+        """
+        return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
+
+    def severe(self, *args, **kwargs):
+        """
+        Level-4, "SEVERE": a severe error that must be addressed. If ignored,
+        the output will contain severe errors. Typically level-4 system
+        messages are turned into exceptions which halt processing.
+        """
+        return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
+
+
+class ExtensionOptionError(DataError): pass
+class BadOptionError(ExtensionOptionError): pass
+class BadOptionDataError(ExtensionOptionError): pass
+class DuplicateOptionError(ExtensionOptionError): pass
+
+
+def extract_extension_options(field_list, options_spec):
+    """
+    Return a dictionary mapping extension option names to converted values.
+
+    :Parameters:
+        - `field_list`: A flat field list without field arguments, where each
+          field body consists of a single paragraph only.
+        - `options_spec`: Dictionary mapping known option names to a
+          conversion function such as `int` or `float`.
+
+    :Exceptions:
+        - `KeyError` for unknown option names.
+        - `ValueError` for invalid option values (raised by the conversion
+           function).
+        - `TypeError` for invalid option value types (raised by conversion
+           function).
+        - `DuplicateOptionError` for duplicate options.
+        - `BadOptionError` for invalid fields.
+        - `BadOptionDataError` for invalid option data (missing name,
+          missing data, bad quotes, etc.).
+    """
+    option_list = extract_options(field_list)
+    return assemble_option_dict(option_list, options_spec)
+
+
+def extract_options(field_list):
+    """
+    Return a list of option (name, value) pairs from field names & bodies.
+
+    :Parameter:
+        `field_list`: A flat field list, where each field name is a single
+        word and each field body consists of a single paragraph only.
+
+    :Exceptions:
+        - `BadOptionError` for invalid fields.
+        - `BadOptionDataError` for invalid option data (missing name,
+          missing data, bad quotes, etc.).
+    """
+    option_list = []
+    for field in field_list:
+        if len(field[0].astext().split()) != 1:
+            raise BadOptionError(
+                'extension option field name may not contain multiple words')
+        name = str(field[0].astext().lower())
+        body = field[1]
+        if len(body) == 0:
+            data = None
+        elif (len(body) > 1
+              or not isinstance(body[0], nodes.paragraph)
+              or len(body[0]) != 1
+              or not isinstance(body[0][0], nodes.Text)):
+            raise BadOptionDataError(
+                  'extension option field body may contain\n'
+                  'a single paragraph only (option "%s")' % name)
+        else:
+            data = body[0][0].astext()
+        option_list.append((name, data))
+    return option_list
+
+
+def assemble_option_dict(option_list, options_spec):
+    """
+    Return a mapping of option names to values.
+
+    :Parameters:
+        - `option_list`: A list of (name, value) pairs (the output of
+          `extract_options()`).
+        - `options_spec`: Dictionary mapping known option names to a
+          conversion function such as `int` or `float`.
+
+    :Exceptions:
+        - `KeyError` for unknown option names.
+        - `DuplicateOptionError` for duplicate options.
+        - `ValueError` for invalid option values (raised by conversion
+           function).
+        - `TypeError` for invalid option value types (raised by conversion
+           function).
+    """
+    options = {}
+    for name, value in option_list:
+        convertor = options_spec[name]  # raises KeyError if unknown
+        if convertor is None:
+            raise KeyError(name)        # or if explicitly disabled
+        if name in options:
+            raise DuplicateOptionError('duplicate option "%s"' % name)
+        try:
+            options[name] = convertor(value)
+        except (ValueError, TypeError) as detail:
+            raise detail.__class__('(option: "%s"; value: %r)\n%s'
+                                   % (name, value, ' '.join(detail.args)))
+    return options
+
+
+class NameValueError(DataError): pass
+
+
+def decode_path(path):
+    """
+    Ensure `path` is Unicode. Return `str` instance.
+
+    Decode file/path string in a failsafe manner if not already done.
+    """
+    # TODO: is this still required with Python 3?
+    if isinstance(path, str):
+        return path
+    try:
+        path = path.decode(sys.getfilesystemencoding(), 'strict')
+    except AttributeError:  # default value None has no decode method
+        if not path:
+            return ''
+        raise ValueError('`path` value must be a String or ``None``, '
+                         f'not {path!r}')
+    except UnicodeDecodeError:
+        try:
+            path = path.decode('utf-8', 'strict')
+        except UnicodeDecodeError:
+            path = path.decode('ascii', 'replace')
+    return path
+
+
+def extract_name_value(line):
+    """
+    Return a list of (name, value) from a line of the form "name=value ...".
+
+    :Exception:
+        `NameValueError` for invalid input (missing name, missing data, bad
+        quotes, etc.).
+    """
+    attlist = []
+    while line:
+        equals = line.find('=')
+        if equals == -1:
+            raise NameValueError('missing "="')
+        attname = line[:equals].strip()
+        if equals == 0 or not attname:
+            raise NameValueError(
+                  'missing attribute name before "="')
+        line = line[equals+1:].lstrip()
+        if not line:
+            raise NameValueError(
+                  'missing value after "%s="' % attname)
+        if line[0] in '\'"':
+            endquote = line.find(line[0], 1)
+            if endquote == -1:
+                raise NameValueError(
+                      'attribute "%s" missing end quote (%s)'
+                      % (attname, line[0]))
+            if len(line) > endquote + 1 and line[endquote + 1].strip():
+                raise NameValueError(
+                      'attribute "%s" end quote (%s) not followed by '
+                      'whitespace' % (attname, line[0]))
+            data = line[1:endquote]
+            line = line[endquote+1:].lstrip()
+        else:
+            space = line.find(' ')
+            if space == -1:
+                data = line
+                line = ''
+            else:
+                data = line[:space]
+                line = line[space+1:].lstrip()
+        attlist.append((attname.lower(), data))
+    return attlist
+
+
+def new_reporter(source_path, settings):
+    """
+    Return a new Reporter object.
+
+    :Parameters:
+        `source` : string
+            The path to or description of the source text of the document.
+        `settings` : optparse.Values object
+            Runtime settings.
+    """
+    reporter = Reporter(
+        source_path, settings.report_level, settings.halt_level,
+        stream=settings.warning_stream, debug=settings.debug,
+        encoding=settings.error_encoding,
+        error_handler=settings.error_encoding_error_handler)
+    return reporter
+
+
+def new_document(source_path, settings=None):
+    """
+    Return a new empty document object.
+
+    :Parameters:
+        `source_path` : string
+            The path to or description of the source text of the document.
+        `settings` : optparse.Values object
+            Runtime settings.  If none are provided, a default core set will
+            be used.  If you will use the document object with any Docutils
+            components, you must provide their default settings as well.
+
+            For example, if parsing rST, at least provide the rst-parser
+            settings, obtainable as follows:
+
+            Defaults for parser component::
+
+                settings = docutils.frontend.get_default_settings(
+                               docutils.parsers.rst.Parser)
+
+            Defaults and configuration file customizations::
+
+                settings = docutils.core.Publisher(
+                    parser=docutils.parsers.rst.Parser).get_settings()
+
+    """
+    # Import at top of module would lead to circular dependency!
+    from docutils import frontend
+    if settings is None:
+        settings = frontend.get_default_settings()
+    source_path = decode_path(source_path)
+    reporter = new_reporter(source_path, settings)
+    document = nodes.document(settings, reporter, source=source_path)
+    document.note_source(source_path, -1)
+    return document
+
+
+def clean_rcs_keywords(paragraph, keyword_substitutions):
+    if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
+        textnode = paragraph[0]
+        for pattern, substitution in keyword_substitutions:
+            match = pattern.search(textnode)
+            if match:
+                paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
+                return
+
+
+def relative_path(source, target):
+    """
+    Build and return a path to `target`, relative to `source` (both files).
+
+    The return value is a `str` suitable to be included in `source`
+    as a reference to `target`.
+
+    :Parameters:
+        `source` : path-like object or None
+            Path of a file in the start directory for the relative path
+            (the file does not need to exist).
+            The value ``None`` is replaced with "<cwd>/dummy_file".
+        `target` : path-like object
+            End point of the returned relative path.
+
+    Differences to `os.path.relpath()`:
+
+    * Inverse argument order.
+    * `source` is assumed to be a FILE in the start directory (add a "dummy"
+      file name to obtain the path relative from a directory)
+      while `os.path.relpath()` expects a DIRECTORY as `start` argument.
+    * Always use Posix path separator ("/") for the output.
+    * Use `os.sep` for parsing the input
+      (changing the value of `os.sep` is ignored by `os.relpath()`).
+    * If there is no common prefix, return the absolute path to `target`.
+
+    Differences to `pathlib.PurePath.relative_to(other)`:
+
+    * pathlib offers an object oriented interface.
+    * `source` expects path to a FILE while `other` expects a DIRECTORY.
+    * `target` defaults to the cwd, no default value for `other`.
+    * `relative_path()` always returns a path (relative or absolute),
+      while `PurePath.relative_to()` raises a ValueError
+      if `target` is not a subpath of `other` (no ".." inserted).
+    """
+    source_parts = os.path.abspath(source or type(target)('dummy_file')
+                                   ).split(os.sep)
+    target_parts = os.path.abspath(target).split(os.sep)
+    # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
+    if source_parts[:2] != target_parts[:2]:
+        # Nothing in common between paths.
+        # Return absolute path, using '/' for URLs:
+        return '/'.join(target_parts)
+    source_parts.reverse()
+    target_parts.reverse()
+    while (source_parts and target_parts
+           and source_parts[-1] == target_parts[-1]):
+        # Remove path components in common:
+        source_parts.pop()
+        target_parts.pop()
+    target_parts.reverse()
+    parts = ['..'] * (len(source_parts) - 1) + target_parts
+    return '/'.join(parts)
+
+
+def get_stylesheet_reference(settings, relative_to=None):
+    """
+    Retrieve a stylesheet reference from the settings object.
+
+    Deprecated. Use get_stylesheet_list() instead to
+    enable specification of multiple stylesheets as a comma-separated
+    list.
+    """
+    warnings.warn('utils.get_stylesheet_reference()'
+                  ' is obsoleted by utils.get_stylesheet_list()'
+                  ' and will be removed in Docutils 2.0.',
+                  DeprecationWarning, stacklevel=2)
+    if settings.stylesheet_path:
+        assert not settings.stylesheet, (
+            'stylesheet and stylesheet_path are mutually exclusive.')
+        if relative_to is None:
+            relative_to = settings._destination
+        return relative_path(relative_to, settings.stylesheet_path)
+    else:
+        return settings.stylesheet
+
+
+# Return 'stylesheet' or 'stylesheet_path' arguments as list.
+#
+# The original settings arguments are kept unchanged: you can test
+# with e.g. ``if settings.stylesheet_path: ...``.
+#
+# Differences to the depracated `get_stylesheet_reference()`:
+# * return value is a list
+# * no re-writing of the path (and therefore no optional argument)
+#   (if required, use ``utils.relative_path(source, target)``
+#   in the calling script)
+def get_stylesheet_list(settings):
+    """
+    Retrieve list of stylesheet references from the settings object.
+    """
+    assert not (settings.stylesheet and settings.stylesheet_path), (
+            'stylesheet and stylesheet_path are mutually exclusive.')
+    stylesheets = settings.stylesheet_path or settings.stylesheet or []
+    # programmatically set default may be string with comma separated list:
+    if not isinstance(stylesheets, list):
+        stylesheets = [path.strip() for path in stylesheets.split(',')]
+    if settings.stylesheet_path:
+        # expand relative paths if found in stylesheet-dirs:
+        stylesheets = [find_file_in_dirs(path, settings.stylesheet_dirs)
+                       for path in stylesheets]
+    return stylesheets
+
+
+def find_file_in_dirs(path, dirs):
+    """
+    Search for `path` in the list of directories `dirs`.
+
+    Return the first expansion that matches an existing file.
+    """
+    path = Path(path)
+    if path.is_absolute():
+        return path.as_posix()
+    for d in dirs:
+        f = Path(d).expanduser() / path
+        if f.exists():
+            return f.as_posix()
+    return path.as_posix()
+
+
+def get_trim_footnote_ref_space(settings):
+    """
+    Return whether or not to trim footnote space.
+
+    If trim_footnote_reference_space is not None, return it.
+
+    If trim_footnote_reference_space is None, return False unless the
+    footnote reference style is 'superscript'.
+    """
+    if settings.setdefault('trim_footnote_reference_space', None) is None:
+        return getattr(settings, 'footnote_references', None) == 'superscript'
+    else:
+        return settings.trim_footnote_reference_space
+
+
+def get_source_line(node):
+    """
+    Return the "source" and "line" attributes from the `node` given or from
+    its closest ancestor.
+    """
+    while node:
+        if node.source or node.line:
+            return node.source, node.line
+        node = node.parent
+    return None, None
+
+
+def escape2null(text):
+    """Return a string with escape-backslashes converted to nulls."""
+    parts = []
+    start = 0
+    while True:
+        found = text.find('\\', start)
+        if found == -1:
+            parts.append(text[start:])
+            return ''.join(parts)
+        parts.append(text[start:found])
+        parts.append('\x00' + text[found+1:found+2])
+        start = found + 2               # skip character after escape
+
+
+def split_escaped_whitespace(text):
+    """
+    Split `text` on escaped whitespace (null+space or null+newline).
+    Return a list of strings.
+    """
+    strings = text.split('\x00 ')
+    strings = [string.split('\x00\n') for string in strings]
+    # flatten list of lists of strings to list of strings:
+    return list(itertools.chain(*strings))
+
+
+def strip_combining_chars(text):
+    return ''.join(c for c in text if not unicodedata.combining(c))
+
+
+def find_combining_chars(text):
+    """Return indices of all combining chars in  Unicode string `text`.
+
+    >>> from docutils.utils import find_combining_chars
+    >>> find_combining_chars('A t̆ab̆lĕ')
+    [3, 6, 9]
+
+    """
+    return [i for i, c in enumerate(text) if unicodedata.combining(c)]
+
+
+def column_indices(text):
+    """Indices of Unicode string `text` when skipping combining characters.
+
+    >>> from docutils.utils import column_indices
+    >>> column_indices('A t̆ab̆lĕ')
+    [0, 1, 2, 4, 5, 7, 8]
+
+    """
+    # TODO: account for asian wide chars here instead of using dummy
+    # replacements in the tableparser?
+    string_indices = list(range(len(text)))
+    for index in find_combining_chars(text):
+        string_indices[index] = None
+    return [i for i in string_indices if i is not None]
+
+
+east_asian_widths = {'W': 2,   # Wide
+                     'F': 2,   # Full-width (wide)
+                     'Na': 1,  # Narrow
+                     'H': 1,   # Half-width (narrow)
+                     'N': 1,   # Neutral (not East Asian, treated as narrow)
+                     'A': 1,   # Ambiguous (s/b wide in East Asian context,
+                     }         # narrow otherwise, but that doesn't work)
+"""Mapping of result codes from `unicodedata.east_asian_widt()` to character
+column widths."""
+
+
+def column_width(text):
+    """Return the column width of text.
+
+    Correct ``len(text)`` for wide East Asian and combining Unicode chars.
+    """
+    width = sum(east_asian_widths[unicodedata.east_asian_width(c)]
+                for c in text)
+    # correction for combining chars:
+    width -= len(find_combining_chars(text))
+    return width
+
+
+def uniq(L):
+    r = []
+    for item in L:
+        if item not in r:
+            r.append(item)
+    return r
+
+
+def normalize_language_tag(tag):
+    """Return a list of normalized combinations for a `BCP 47` language tag.
+
+    Example:
+
+    >>> from docutils.utils import normalize_language_tag
+    >>> normalize_language_tag('de_AT-1901')
+    ['de-at-1901', 'de-at', 'de-1901', 'de']
+    >>> normalize_language_tag('de-CH-x_altquot')
+    ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de']
+
+    """
+    # normalize:
+    tag = tag.lower().replace('-', '_')
+    # split (except singletons, which mark the following tag as non-standard):
+    tag = re.sub(r'_([a-zA-Z0-9])_', r'_\1-', tag)
+    subtags = [subtag for subtag in tag.split('_')]
+    base_tag = (subtags.pop(0),)
+    # find all combinations of subtags
+    taglist = []
+    for n in range(len(subtags), 0, -1):
+        for tags in itertools.combinations(subtags, n):
+            taglist.append('-'.join(base_tag+tags))
+    taglist += base_tag
+    return taglist
+
+
+def xml_declaration(encoding=None):
+    """Return an XML text declaration.
+
+    Include an encoding declaration, if `encoding`
+    is not 'unicode', '', or None.
+    """
+    if encoding and encoding.lower() != 'unicode':
+        encoding_declaration = f' encoding="{encoding}"'
+    else:
+        encoding_declaration = ''
+    return f'<?xml version="1.0"{encoding_declaration}?>\n'
+
+
+class DependencyList:
+
+    """
+    List of dependencies, with file recording support.
+
+    Note that the output file is not automatically closed.  You have
+    to explicitly call the close() method.
+    """
+
+    def __init__(self, output_file=None, dependencies=()):
+        """
+        Initialize the dependency list, automatically setting the
+        output file to `output_file` (see `set_output()`) and adding
+        all supplied dependencies.
+
+        If output_file is None, no file output is done when calling add().
+        """
+        self.list = []
+        self.file = None
+        if output_file:
+            self.set_output(output_file)
+        self.add(*dependencies)
+
+    def set_output(self, output_file):
+        """
+        Set the output file and clear the list of already added
+        dependencies.
+
+        `output_file` must be a string.  The specified file is
+        immediately overwritten.
+
+        If output_file is '-', the output will be written to stdout.
+        """
+        if output_file:
+            if output_file == '-':
+                self.file = sys.stdout
+            else:
+                self.file = open(output_file, 'w', encoding='utf-8')
+
+    def add(self, *paths):
+        """
+        Append `path` to `self.list` unless it is already there.
+
+        Also append to `self.file` unless it is already there
+        or `self.file is `None`.
+        """
+        for path in paths:
+            if isinstance(path, PurePath):
+                path = path.as_posix()  # use '/' as separator
+            if path not in self.list:
+                self.list.append(path)
+                if self.file is not None:
+                    self.file.write(path+'\n')
+
+    def close(self):
+        """
+        Close the output file.
+        """
+        if self.file is not sys.stdout:
+            self.file.close()
+        self.file = None
+
+    def __repr__(self):
+        try:
+            output_file = self.file.name
+        except AttributeError:
+            output_file = None
+        return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)
+
+
+release_level_abbreviations = {
+    'alpha': 'a',
+    'beta': 'b',
+    'candidate': 'rc',
+    'final': ''}
+
+
+def version_identifier(version_info=None):
+    """
+    Return a version identifier string built from `version_info`, a
+    `docutils.VersionInfo` namedtuple instance or compatible tuple. If
+    `version_info` is not provided, by default return a version identifier
+    string based on `docutils.__version_info__` (i.e. the current Docutils
+    version).
+    """
+    if version_info is None:
+        version_info = __version_info__
+    if version_info.micro:
+        micro = '.%s' % version_info.micro
+    else:
+        # 0 is omitted:
+        micro = ''
+    releaselevel = release_level_abbreviations[version_info.releaselevel]
+    if version_info.serial:
+        serial = version_info.serial
+    else:
+        # 0 is omitted:
+        serial = ''
+    if version_info.release:
+        dev = ''
+    else:
+        dev = '.dev'
+    version = '%s.%s%s%s%s%s' % (
+        version_info.major,
+        version_info.minor,
+        micro,
+        releaselevel,
+        serial,
+        dev)
+    return version