aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/docutils/io.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/docutils/io.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docutils/io.py637
1 files changed, 637 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docutils/io.py b/.venv/lib/python3.12/site-packages/docutils/io.py
new file mode 100644
index 00000000..6237c66a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docutils/io.py
@@ -0,0 +1,637 @@
+# $Id: io.py 9427 2023-07-07 06:50:09Z milde $
+# Author: David Goodger <goodger@python.org>
+# Copyright: This module has been placed in the public domain.
+
+"""
+I/O classes provide a uniform API for low-level input and output. Subclasses
+exist for a variety of input/output mechanisms.
+"""
+
+__docformat__ = 'reStructuredText'
+
+import codecs
+import locale
+import os
+import re
+import sys
+import warnings
+
+from docutils import TransformSpec
+
+
+# Guess the locale's preferred encoding.
+# If no valid guess can be made, _locale_encoding is set to `None`:
+#
+# TODO: check whether this is set correctly with every OS and Python version
+# or whether front-end tools need to call `locale.setlocale()`
+# before importing this module
+try:
+ # Return locale encoding also in UTF-8 mode
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ _locale_encoding = (locale.getlocale()[1]
+ or locale.getdefaultlocale()[1])
+ _locale_encoding = _locale_encoding.lower()
+except: # noqa any other problems determining the locale -> use None
+ _locale_encoding = None
+try:
+ codecs.lookup(_locale_encoding)
+except (LookupError, TypeError):
+ _locale_encoding = None
+
+
+class InputError(OSError): pass
+class OutputError(OSError): pass
+
+
+def check_encoding(stream, encoding):
+ """Test, whether the encoding of `stream` matches `encoding`.
+
+ Returns
+
+ :None: if `encoding` or `stream.encoding` are not a valid encoding
+ argument (e.g. ``None``) or `stream.encoding is missing.
+ :True: if the encoding argument resolves to the same value as `encoding`,
+ :False: if the encodings differ.
+ """
+ try:
+ return codecs.lookup(stream.encoding) == codecs.lookup(encoding)
+ except (LookupError, AttributeError, TypeError):
+ return None
+
+
+def error_string(err):
+ """Return string representation of Exception `err`.
+ """
+ return f'{err.__class__.__name__}: {err}'
+
+
+class Input(TransformSpec):
+ """
+ Abstract base class for input wrappers.
+
+ Docutils input objects must provide a `read()` method that
+ returns the source, typically as `str` instance.
+
+ Inheriting `TransformSpec` allows input objects to add
+ "transforms" and "unknown_reference_resolvers" to the "Transformer".
+ (Optional for custom input objects since Docutils 0.19.)
+ """
+
+ component_type = 'input'
+
+ default_source_path = None
+
+ def __init__(self, source=None, source_path=None, encoding=None,
+ error_handler='strict'):
+ self.encoding = encoding
+ """Text encoding for the input source."""
+
+ self.error_handler = error_handler
+ """Text decoding error handler."""
+
+ self.source = source
+ """The source of input data."""
+
+ self.source_path = source_path
+ """A text reference to the source."""
+
+ if not source_path:
+ self.source_path = self.default_source_path
+
+ self.successful_encoding = None
+ """The encoding that successfully decoded the source data."""
+
+ def __repr__(self):
+ return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
+ self.source_path)
+
+ def read(self):
+ """Return input as `str`. Define in subclasses."""
+ raise NotImplementedError
+
+ def decode(self, data):
+ """
+ Decode `data` if required.
+
+ Return Unicode `str` instances unchanged (nothing to decode).
+
+ If `self.encoding` is None, determine encoding from data
+ or try UTF-8 and the locale's preferred encoding.
+ The client application should call ``locale.setlocale()`` at the
+ beginning of processing::
+
+ locale.setlocale(locale.LC_ALL, '')
+
+ Raise UnicodeError if unsuccessful.
+
+ Provisional: encoding detection will be removed in Docutils 1.0.
+ """
+ if self.encoding and self.encoding.lower() == 'unicode':
+ assert isinstance(data, str), ('input encoding is "unicode" '
+ 'but `data` is no `str` instance')
+ if isinstance(data, str):
+ # nothing to decode
+ return data
+ if self.encoding:
+ # We believe the user/application when the encoding is
+ # explicitly given.
+ encoding_candidates = [self.encoding]
+ else:
+ data_encoding = self.determine_encoding_from_data(data)
+ if data_encoding:
+ # `data` declares its encoding with "magic comment" or BOM,
+ encoding_candidates = [data_encoding]
+ else:
+ # Apply heuristics if the encoding is not specified.
+ # Start with UTF-8, because that only matches
+ # data that *IS* UTF-8:
+ encoding_candidates = ['utf-8']
+ # If UTF-8 fails, fall back to the locale's preferred encoding:
+ fallback = locale.getpreferredencoding(do_setlocale=False)
+ if fallback and fallback.lower() != 'utf-8':
+ encoding_candidates.append(fallback)
+ for enc in encoding_candidates:
+ try:
+ decoded = str(data, enc, self.error_handler)
+ self.successful_encoding = enc
+ return decoded
+ except (UnicodeError, LookupError) as err:
+ # keep exception instance for use outside of the "for" loop.
+ error = err
+ raise UnicodeError(
+ 'Unable to decode input data. Tried the following encodings: '
+ f'{", ".join(repr(enc) for enc in encoding_candidates)}.\n'
+ f'({error_string(error)})')
+
+ coding_slug = re.compile(br"coding[:=]\s*([-\w.]+)")
+ """Encoding declaration pattern."""
+
+ byte_order_marks = ((codecs.BOM_UTF32_BE, 'utf-32'),
+ (codecs.BOM_UTF32_LE, 'utf-32'),
+ (codecs.BOM_UTF8, 'utf-8-sig'),
+ (codecs.BOM_UTF16_BE, 'utf-16'),
+ (codecs.BOM_UTF16_LE, 'utf-16'),
+ )
+ """Sequence of (start_bytes, encoding) tuples for encoding detection.
+ The first bytes of input data are checked against the start_bytes strings.
+ A match indicates the given encoding."""
+
+ def determine_encoding_from_data(self, data):
+ """
+ Try to determine the encoding of `data` by looking *in* `data`.
+ Check for a byte order mark (BOM) or an encoding declaration.
+ """
+ # check for a byte order mark:
+ for start_bytes, encoding in self.byte_order_marks:
+ if data.startswith(start_bytes):
+ return encoding
+ # check for an encoding declaration pattern in first 2 lines of file:
+ for line in data.splitlines()[:2]:
+ match = self.coding_slug.search(line)
+ if match:
+ return match.group(1).decode('ascii')
+ return None
+
+ def isatty(self):
+ """Return True, if the input source is connected to a TTY device."""
+ try:
+ return self.source.isatty()
+ except AttributeError:
+ return False
+
+
+class Output(TransformSpec):
+ """
+ Abstract base class for output wrappers.
+
+ Docutils output objects must provide a `write()` method that
+ expects and handles one argument (the output).
+
+ Inheriting `TransformSpec` allows output objects to add
+ "transforms" and "unknown_reference_resolvers" to the "Transformer".
+ (Optional for custom output objects since Docutils 0.19.)
+ """
+
+ component_type = 'output'
+
+ default_destination_path = None
+
+ def __init__(self, destination=None, destination_path=None,
+ encoding=None, error_handler='strict'):
+ self.encoding = encoding
+ """Text encoding for the output destination."""
+
+ self.error_handler = error_handler or 'strict'
+ """Text encoding error handler."""
+
+ self.destination = destination
+ """The destination for output data."""
+
+ self.destination_path = destination_path
+ """A text reference to the destination."""
+
+ if not destination_path:
+ self.destination_path = self.default_destination_path
+
+ def __repr__(self):
+ return ('%s: destination=%r, destination_path=%r'
+ % (self.__class__, self.destination, self.destination_path))
+
+ def write(self, data):
+ """Write `data`. Define in subclasses."""
+ raise NotImplementedError
+
+ def encode(self, data):
+ """
+ Encode and return `data`.
+
+ If `data` is a `bytes` instance, it is returned unchanged.
+ Otherwise it is encoded with `self.encoding`.
+
+ Provisional: If `self.encoding` is set to the pseudo encoding name
+ "unicode", `data` must be a `str` instance and is returned unchanged.
+ """
+ if self.encoding and self.encoding.lower() == 'unicode':
+ assert isinstance(data, str), ('output encoding is "unicode" '
+ 'but `data` is no `str` instance')
+ return data
+ if not isinstance(data, str):
+ # Non-unicode (e.g. bytes) output.
+ return data
+ else:
+ return data.encode(self.encoding, self.error_handler)
+
+
+class ErrorOutput:
+ """
+ Wrapper class for file-like error streams with
+ failsafe de- and encoding of `str`, `bytes`, `unicode` and
+ `Exception` instances.
+ """
+
+ def __init__(self, destination=None, encoding=None,
+ encoding_errors='backslashreplace',
+ decoding_errors='replace'):
+ """
+ :Parameters:
+ - `destination`: a file-like object,
+ a string (path to a file),
+ `None` (write to `sys.stderr`, default), or
+ evaluating to `False` (write() requests are ignored).
+ - `encoding`: `destination` text encoding. Guessed if None.
+ - `encoding_errors`: how to treat encoding errors.
+ """
+ if destination is None:
+ destination = sys.stderr
+ elif not destination:
+ destination = False
+ # if `destination` is a file name, open it
+ elif isinstance(destination, str):
+ destination = open(destination, 'w')
+
+ self.destination = destination
+ """Where warning output is sent."""
+
+ self.encoding = (encoding or getattr(destination, 'encoding', None)
+ or _locale_encoding or 'ascii')
+ """The output character encoding."""
+
+ self.encoding_errors = encoding_errors
+ """Encoding error handler."""
+
+ self.decoding_errors = decoding_errors
+ """Decoding error handler."""
+
+ def write(self, data):
+ """
+ Write `data` to self.destination. Ignore, if self.destination is False.
+
+ `data` can be a `bytes`, `str`, or `Exception` instance.
+ """
+ if not self.destination:
+ return
+ if isinstance(data, Exception):
+ data = str(data)
+ try:
+ self.destination.write(data)
+ except UnicodeEncodeError:
+ self.destination.write(data.encode(self.encoding,
+ self.encoding_errors))
+ except TypeError:
+ if isinstance(data, str): # destination may expect bytes
+ self.destination.write(data.encode(self.encoding,
+ self.encoding_errors))
+ elif self.destination in (sys.stderr, sys.stdout):
+ # write bytes to raw stream
+ self.destination.buffer.write(data)
+ else:
+ self.destination.write(str(data, self.encoding,
+ self.decoding_errors))
+
+ def close(self):
+ """
+ Close the error-output stream.
+
+ Ignored if the destination is` sys.stderr` or `sys.stdout` or has no
+ close() method.
+ """
+ if self.destination in (sys.stdout, sys.stderr):
+ return
+ try:
+ self.destination.close()
+ except AttributeError:
+ pass
+
+ def isatty(self):
+ """Return True, if the destination is connected to a TTY device."""
+ try:
+ return self.destination.isatty()
+ except AttributeError:
+ return False
+
+
+class FileInput(Input):
+
+ """
+ Input for single, simple file-like objects.
+ """
+ def __init__(self, source=None, source_path=None,
+ encoding=None, error_handler='strict',
+ autoclose=True, mode='r'):
+ """
+ :Parameters:
+ - `source`: either a file-like object (which is read directly), or
+ `None` (which implies `sys.stdin` if no `source_path` given).
+ - `source_path`: a path to a file, which is opened for reading.
+ - `encoding`: the expected text encoding of the input file.
+ - `error_handler`: the encoding error handler to use.
+ - `autoclose`: close automatically after read (except when
+ `sys.stdin` is the source).
+ - `mode`: how the file is to be opened (see standard function
+ `open`). The default is read only ('r').
+ """
+ Input.__init__(self, source, source_path, encoding, error_handler)
+ self.autoclose = autoclose
+ self._stderr = ErrorOutput()
+
+ if source is None:
+ if source_path:
+ try:
+ self.source = open(source_path, mode,
+ encoding=self.encoding,
+ errors=self.error_handler)
+ except OSError as error:
+ raise InputError(error.errno, error.strerror, source_path)
+ else:
+ self.source = sys.stdin
+ elif check_encoding(self.source, self.encoding) is False:
+ # TODO: re-open, warn or raise error?
+ raise UnicodeError('Encoding clash: encoding given is "%s" '
+ 'but source is opened with encoding "%s".' %
+ (self.encoding, self.source.encoding))
+ if not source_path:
+ try:
+ self.source_path = self.source.name
+ except AttributeError:
+ pass
+
+ def read(self):
+ """
+ Read and decode a single file, return as `str`.
+ """
+ try:
+ if not self.encoding and hasattr(self.source, 'buffer'):
+ # read as binary data
+ data = self.source.buffer.read()
+ # decode with heuristics
+ data = self.decode(data)
+ # normalize newlines
+ data = '\n'.join(data.splitlines()+[''])
+ else:
+ data = self.source.read()
+ finally:
+ if self.autoclose:
+ self.close()
+ return data
+
+ def readlines(self):
+ """
+ Return lines of a single file as list of strings.
+ """
+ return self.read().splitlines(True)
+
+ def close(self):
+ if self.source is not sys.stdin:
+ self.source.close()
+
+
+class FileOutput(Output):
+
+ """Output for single, simple file-like objects."""
+
+ default_destination_path = '<file>'
+
+ mode = 'w'
+ """The mode argument for `open()`."""
+ # 'wb' for binary (e.g. OpenOffice) files (see also `BinaryFileOutput`).
+ # (Do not use binary mode ('wb') for text files, as this prevents the
+ # conversion of newlines to the system specific default.)
+
+ def __init__(self, destination=None, destination_path=None,
+ encoding=None, error_handler='strict', autoclose=True,
+ handle_io_errors=None, mode=None):
+ """
+ :Parameters:
+ - `destination`: either a file-like object (which is written
+ directly) or `None` (which implies `sys.stdout` if no
+ `destination_path` given).
+ - `destination_path`: a path to a file, which is opened and then
+ written.
+ - `encoding`: the text encoding of the output file.
+ - `error_handler`: the encoding error handler to use.
+ - `autoclose`: close automatically after write (except when
+ `sys.stdout` or `sys.stderr` is the destination).
+ - `handle_io_errors`: ignored, deprecated, will be removed.
+ - `mode`: how the file is to be opened (see standard function
+ `open`). The default is 'w', providing universal newline
+ support for text files.
+ """
+ Output.__init__(self, destination, destination_path,
+ encoding, error_handler)
+ self.opened = True
+ self.autoclose = autoclose
+ if handle_io_errors is not None:
+ warnings.warn('io.FileOutput: init argument "handle_io_errors" '
+ 'is ignored and will be removed in '
+ 'Docutils 2.0.', DeprecationWarning, stacklevel=2)
+ if mode is not None:
+ self.mode = mode
+ self._stderr = ErrorOutput()
+ if destination is None:
+ if destination_path:
+ self.opened = False
+ else:
+ self.destination = sys.stdout
+ elif ( # destination is file-type object -> check mode:
+ mode and hasattr(self.destination, 'mode')
+ and mode != self.destination.mode):
+ print('Warning: Destination mode "%s" differs from specified '
+ 'mode "%s"' % (self.destination.mode, mode),
+ file=self._stderr)
+ if not destination_path:
+ try:
+ self.destination_path = self.destination.name
+ except AttributeError:
+ pass
+
+ def open(self):
+ # Specify encoding
+ if 'b' not in self.mode:
+ kwargs = {'encoding': self.encoding,
+ 'errors': self.error_handler}
+ else:
+ kwargs = {}
+ try:
+ self.destination = open(self.destination_path, self.mode, **kwargs)
+ except OSError as error:
+ raise OutputError(error.errno, error.strerror,
+ self.destination_path)
+ self.opened = True
+
+ def write(self, data):
+ """Write `data` to a single file, also return it.
+
+ `data` can be a `str` or `bytes` instance.
+ If writing `bytes` fails, an attempt is made to write to
+ the low-level interface ``self.destination.buffer``.
+
+ If `data` is a `str` instance and `self.encoding` and
+ `self.destination.encoding` are set to different values, `data`
+ is encoded to a `bytes` instance using `self.encoding`.
+
+ Provisional: future versions may raise an error if `self.encoding`
+ and `self.destination.encoding` are set to different values.
+ """
+ if not self.opened:
+ self.open()
+ if (isinstance(data, str)
+ and check_encoding(self.destination, self.encoding) is False):
+ if os.linesep != '\n':
+ data = data.replace('\n', os.linesep) # fix endings
+ data = self.encode(data)
+
+ try:
+ self.destination.write(data)
+ except TypeError as err:
+ if isinstance(data, bytes):
+ try:
+ self.destination.buffer.write(data)
+ except AttributeError:
+ if check_encoding(self.destination,
+ self.encoding) is False:
+ raise ValueError(
+ f'Encoding of {self.destination_path} '
+ f'({self.destination.encoding}) differs \n'
+ f' from specified encoding ({self.encoding})')
+ else:
+ raise err
+ except (UnicodeError, LookupError) as err:
+ raise UnicodeError(
+ 'Unable to encode output data. output-encoding is: '
+ f'{self.encoding}.\n({error_string(err)})')
+ finally:
+ if self.autoclose:
+ self.close()
+ return data
+
+ def close(self):
+ if self.destination not in (sys.stdout, sys.stderr):
+ self.destination.close()
+ self.opened = False
+
+
+class BinaryFileOutput(FileOutput):
+ """
+ A version of docutils.io.FileOutput which writes to a binary file.
+ """
+ # Used by core.publish_cmdline_to_binary() which in turn is used by
+ # tools/rst2odt.py but not by core.rst2odt().
+ mode = 'wb'
+
+
+class StringInput(Input):
+ """Input from a `str` or `bytes` instance."""
+
+ default_source_path = '<string>'
+
+ def read(self):
+ """Return the source as `str` instance.
+
+ Decode, if required (see `Input.decode`).
+ """
+ return self.decode(self.source)
+
+
+class StringOutput(Output):
+ """Output to a `bytes` or `str` instance.
+
+ Provisional.
+ """
+
+ default_destination_path = '<string>'
+
+ def write(self, data):
+ """Store `data` in `self.destination`, and return it.
+
+ If `self.encoding` is set to the pseudo encoding name "unicode",
+ `data` must be a `str` instance and is stored/returned unchanged
+ (cf. `Output.encode`).
+
+ Otherwise, `data` can be a `bytes` or `str` instance and is
+ stored/returned as a `bytes` instance
+ (`str` data is encoded with `self.encode()`).
+
+ Attention: the `output_encoding`_ setting may affect the content
+ of the output (e.g. an encoding declaration in HTML or XML or the
+ representation of characters as LaTeX macro vs. literal character).
+ """
+ self.destination = self.encode(data)
+ return self.destination
+
+
+class NullInput(Input):
+
+ """Degenerate input: read nothing."""
+
+ default_source_path = 'null input'
+
+ def read(self):
+ """Return an empty string."""
+ return ''
+
+
+class NullOutput(Output):
+
+ """Degenerate output: write nothing."""
+
+ default_destination_path = 'null output'
+
+ def write(self, data):
+ """Do nothing, return None."""
+ pass
+
+
+class DocTreeInput(Input):
+
+ """
+ Adapter for document tree input.
+
+ The document tree must be passed in the ``source`` parameter.
+ """
+
+ default_source_path = 'doctree input'
+
+ def read(self):
+ """Return the document tree."""
+ return self.source