diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py b/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py new file mode 100644 index 00000000..ad4b5a9e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py @@ -0,0 +1,139 @@ +"""Tests of the builder registry.""" + +import pytest +import warnings +from typing import Type + +from bs4 import BeautifulSoup +from bs4.builder import ( + builder_registry as registry, + TreeBuilder, + TreeBuilderRegistry, +) +from bs4.builder._htmlparser import HTMLParserTreeBuilder + +from . import ( + HTML5LIB_PRESENT, + LXML_PRESENT, +) + +if HTML5LIB_PRESENT: + from bs4.builder._html5lib import HTML5TreeBuilder + +if LXML_PRESENT: + from bs4.builder._lxml import ( + LXMLTreeBuilderForXML, + LXMLTreeBuilder, + ) + + +# TODO: Split out the lxml and html5lib tests into their own classes +# and gate with pytest.mark.skipIf. +class TestBuiltInRegistry(object): + """Test the built-in registry with the default builders registered.""" + + def test_combination(self): + assert registry.lookup("strict", "html") == HTMLParserTreeBuilder + if LXML_PRESENT: + assert registry.lookup("fast", "html") == LXMLTreeBuilder + assert registry.lookup("permissive", "xml") == LXMLTreeBuilderForXML + if HTML5LIB_PRESENT: + assert registry.lookup("html5lib", "html") == HTML5TreeBuilder + + def test_lookup_by_markup_type(self): + if LXML_PRESENT: + assert registry.lookup("html") == LXMLTreeBuilder + assert registry.lookup("xml") == LXMLTreeBuilderForXML + else: + assert registry.lookup("xml") is None + if HTML5LIB_PRESENT: + assert registry.lookup("html") == HTML5TreeBuilder + else: + assert registry.lookup("html") == HTMLParserTreeBuilder + + def test_named_library(self): + if LXML_PRESENT: + assert registry.lookup("lxml", "xml") == LXMLTreeBuilderForXML + assert registry.lookup("lxml", "html") == LXMLTreeBuilder + if HTML5LIB_PRESENT: + assert registry.lookup("html5lib") == HTML5TreeBuilder + + assert registry.lookup("html.parser") == HTMLParserTreeBuilder + + def test_beautifulsoup_constructor_does_lookup(self): + with warnings.catch_warnings(record=True): + # This will create a warning about not explicitly + # specifying a parser, but we'll ignore it. + + # You can pass in a string. + BeautifulSoup("", features="html") + # Or a list of strings. + BeautifulSoup("", features=["html", "fast"]) + pass + + # You'll get an exception if BS can't find an appropriate + # builder. + with pytest.raises(ValueError): + BeautifulSoup("", features="no-such-feature") + + +class TestRegistry(object): + """Test the TreeBuilderRegistry class in general.""" + + def setup_method(self): + self.registry = TreeBuilderRegistry() + + def builder_for_features(self, *feature_list: str) -> Type[TreeBuilder]: + cls = type( + "Builder_" + "_".join(feature_list), (object,), {"features": feature_list} + ) + + self.registry.register(cls) + return cls + + def test_register_with_no_features(self): + builder = self.builder_for_features() + + # Since the builder advertises no features, you can't find it + # by looking up features. + assert self.registry.lookup("foo") is None + + # But you can find it by doing a lookup with no features, if + # this happens to be the only registered builder. + assert self.registry.lookup() == builder + + def test_register_with_features_makes_lookup_succeed(self): + builder = self.builder_for_features("foo", "bar") + assert self.registry.lookup("foo") is builder + assert self.registry.lookup("bar") is builder + + def test_lookup_fails_when_no_builder_implements_feature(self): + assert self.registry.lookup("baz") is None + + def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): + self.builder_for_features("foo") + builder2 = self.builder_for_features("bar") + assert self.registry.lookup() == builder2 + + def test_lookup_fails_when_no_tree_builders_registered(self): + assert self.registry.lookup() is None + + def test_lookup_gets_most_recent_builder_supporting_all_features(self): + self.builder_for_features("foo") + self.builder_for_features("bar") + has_both_early = self.builder_for_features("foo", "bar", "baz") + has_both_late = self.builder_for_features("foo", "bar", "quux") + self.builder_for_features("bar") + self.builder_for_features("foo") + + # There are two builders featuring 'foo' and 'bar', but + # the one that also features 'quux' was registered later. + assert self.registry.lookup("foo", "bar") == has_both_late + + # There is only one builder featuring 'foo', 'bar', and 'baz'. + assert self.registry.lookup("foo", "bar", "baz") == has_both_early + + def test_lookup_fails_when_cannot_reconcile_requested_features(self): + self.builder_for_features("foo", "bar") + self.builder_for_features("foo", "baz") + assert self.registry.lookup("bar", "baz") is None |