From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../lib/python3.12/site-packages/bs4/formatter.py | 276 +++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/bs4/formatter.py (limited to '.venv/lib/python3.12/site-packages/bs4/formatter.py') diff --git a/.venv/lib/python3.12/site-packages/bs4/formatter.py b/.venv/lib/python3.12/site-packages/bs4/formatter.py new file mode 100644 index 00000000..bfa08764 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/bs4/formatter.py @@ -0,0 +1,276 @@ +from __future__ import annotations +from typing import Callable, Dict, Iterable, Optional, Set, Tuple, TYPE_CHECKING, Union +from typing_extensions import TypeAlias +from bs4.dammit import EntitySubstitution + +if TYPE_CHECKING: + from bs4._typing import _AttributeValue + + +class Formatter(EntitySubstitution): + """Describes a strategy to use when outputting a parse tree to a string. + + Some parts of this strategy come from the distinction between + HTML4, HTML5, and XML. Others are configurable by the user. + + Formatters are passed in as the `formatter` argument to methods + like `bs4.element.Tag.encode`. Most people won't need to + think about formatters, and most people who need to think about + them can pass in one of these predefined strings as `formatter` + rather than making a new Formatter object: + + For HTML documents: + * 'html' - HTML entity substitution for generic HTML documents. (default) + * 'html5' - HTML entity substitution for HTML5 documents, as + well as some optimizations in the way tags are rendered. + * 'html5-4.12.0' - The version of the 'html5' formatter used prior to + Beautiful Soup 4.13.0. + * 'minimal' - Only make the substitutions necessary to guarantee + valid HTML. + * None - Do not perform any substitution. This will be faster + but may result in invalid markup. + + For XML documents: + * 'html' - Entity substitution for XHTML documents. + * 'minimal' - Only make the substitutions necessary to guarantee + valid XML. (default) + * None - Do not perform any substitution. This will be faster + but may result in invalid markup. + + """ + + #: Constant name denoting HTML markup + HTML: str = "html" + + #: Constant name denoting XML markup + XML: str = "xml" + + #: Default values for the various constructor options when the + #: markup language is HTML. + HTML_DEFAULTS: Dict[str, Set[str]] = dict( + cdata_containing_tags=set(["script", "style"]), + ) + + language: Optional[str] #: :meta private: + entity_substitution: Optional[_EntitySubstitutionFunction] #: :meta private: + void_element_close_prefix: str #: :meta private: + cdata_containing_tags: Set[str] #: :meta private: + indent: str #: :meta private: + + #: If this is set to true by the constructor, then attributes whose + #: values are sent to the empty string will be treated as HTML + #: boolean attributes. (Attributes whose value is None are always + #: rendered this way.) + empty_attributes_are_booleans: bool + + def _default( + self, language: str, value: Optional[Set[str]], kwarg: str + ) -> Set[str]: + if value is not None: + return value + if language == self.XML: + # When XML is the markup language in use, all of the + # defaults are the empty list. + return set() + + # Otherwise, it depends on what's in HTML_DEFAULTS. + return self.HTML_DEFAULTS[kwarg] + + def __init__( + self, + language: Optional[str] = None, + entity_substitution: Optional[_EntitySubstitutionFunction] = None, + void_element_close_prefix: str = "/", + cdata_containing_tags: Optional[Set[str]] = None, + empty_attributes_are_booleans: bool = False, + indent: Union[int,str] = 1, + ): + r"""Constructor. + + :param language: This should be `Formatter.XML` if you are formatting + XML markup and `Formatter.HTML` if you are formatting HTML markup. + + :param entity_substitution: A function to call to replace special + characters with XML/HTML entities. For examples, see + bs4.dammit.EntitySubstitution.substitute_html and substitute_xml. + :param void_element_close_prefix: By default, void elements + are represented as (XML rules) rather than + (HTML rules). To get , pass in the empty string. + :param cdata_containing_tags: The set of tags that are defined + as containing CDATA in this dialect. For example, in HTML, +