aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py')
-rw-r--r--.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py139
1 files changed, 139 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py b/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py
new file mode 100644
index 00000000..ad4b5a9e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py
@@ -0,0 +1,139 @@
+"""Tests of the builder registry."""
+
+import pytest
+import warnings
+from typing import Type
+
+from bs4 import BeautifulSoup
+from bs4.builder import (
+ builder_registry as registry,
+ TreeBuilder,
+ TreeBuilderRegistry,
+)
+from bs4.builder._htmlparser import HTMLParserTreeBuilder
+
+from . import (
+ HTML5LIB_PRESENT,
+ LXML_PRESENT,
+)
+
+if HTML5LIB_PRESENT:
+ from bs4.builder._html5lib import HTML5TreeBuilder
+
+if LXML_PRESENT:
+ from bs4.builder._lxml import (
+ LXMLTreeBuilderForXML,
+ LXMLTreeBuilder,
+ )
+
+
+# TODO: Split out the lxml and html5lib tests into their own classes
+# and gate with pytest.mark.skipIf.
+class TestBuiltInRegistry(object):
+ """Test the built-in registry with the default builders registered."""
+
+ def test_combination(self):
+ assert registry.lookup("strict", "html") == HTMLParserTreeBuilder
+ if LXML_PRESENT:
+ assert registry.lookup("fast", "html") == LXMLTreeBuilder
+ assert registry.lookup("permissive", "xml") == LXMLTreeBuilderForXML
+ if HTML5LIB_PRESENT:
+ assert registry.lookup("html5lib", "html") == HTML5TreeBuilder
+
+ def test_lookup_by_markup_type(self):
+ if LXML_PRESENT:
+ assert registry.lookup("html") == LXMLTreeBuilder
+ assert registry.lookup("xml") == LXMLTreeBuilderForXML
+ else:
+ assert registry.lookup("xml") is None
+ if HTML5LIB_PRESENT:
+ assert registry.lookup("html") == HTML5TreeBuilder
+ else:
+ assert registry.lookup("html") == HTMLParserTreeBuilder
+
+ def test_named_library(self):
+ if LXML_PRESENT:
+ assert registry.lookup("lxml", "xml") == LXMLTreeBuilderForXML
+ assert registry.lookup("lxml", "html") == LXMLTreeBuilder
+ if HTML5LIB_PRESENT:
+ assert registry.lookup("html5lib") == HTML5TreeBuilder
+
+ assert registry.lookup("html.parser") == HTMLParserTreeBuilder
+
+ def test_beautifulsoup_constructor_does_lookup(self):
+ with warnings.catch_warnings(record=True):
+ # This will create a warning about not explicitly
+ # specifying a parser, but we'll ignore it.
+
+ # You can pass in a string.
+ BeautifulSoup("", features="html")
+ # Or a list of strings.
+ BeautifulSoup("", features=["html", "fast"])
+ pass
+
+ # You'll get an exception if BS can't find an appropriate
+ # builder.
+ with pytest.raises(ValueError):
+ BeautifulSoup("", features="no-such-feature")
+
+
+class TestRegistry(object):
+ """Test the TreeBuilderRegistry class in general."""
+
+ def setup_method(self):
+ self.registry = TreeBuilderRegistry()
+
+ def builder_for_features(self, *feature_list: str) -> Type[TreeBuilder]:
+ cls = type(
+ "Builder_" + "_".join(feature_list), (object,), {"features": feature_list}
+ )
+
+ self.registry.register(cls)
+ return cls
+
+ def test_register_with_no_features(self):
+ builder = self.builder_for_features()
+
+ # Since the builder advertises no features, you can't find it
+ # by looking up features.
+ assert self.registry.lookup("foo") is None
+
+ # But you can find it by doing a lookup with no features, if
+ # this happens to be the only registered builder.
+ assert self.registry.lookup() == builder
+
+ def test_register_with_features_makes_lookup_succeed(self):
+ builder = self.builder_for_features("foo", "bar")
+ assert self.registry.lookup("foo") is builder
+ assert self.registry.lookup("bar") is builder
+
+ def test_lookup_fails_when_no_builder_implements_feature(self):
+ assert self.registry.lookup("baz") is None
+
+ def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
+ self.builder_for_features("foo")
+ builder2 = self.builder_for_features("bar")
+ assert self.registry.lookup() == builder2
+
+ def test_lookup_fails_when_no_tree_builders_registered(self):
+ assert self.registry.lookup() is None
+
+ def test_lookup_gets_most_recent_builder_supporting_all_features(self):
+ self.builder_for_features("foo")
+ self.builder_for_features("bar")
+ has_both_early = self.builder_for_features("foo", "bar", "baz")
+ has_both_late = self.builder_for_features("foo", "bar", "quux")
+ self.builder_for_features("bar")
+ self.builder_for_features("foo")
+
+ # There are two builders featuring 'foo' and 'bar', but
+ # the one that also features 'quux' was registered later.
+ assert self.registry.lookup("foo", "bar") == has_both_late
+
+ # There is only one builder featuring 'foo', 'bar', and 'baz'.
+ assert self.registry.lookup("foo", "bar", "baz") == has_both_early
+
+ def test_lookup_fails_when_cannot_reconcile_requested_features(self):
+ self.builder_for_features("foo", "bar")
+ self.builder_for_features("foo", "baz")
+ assert self.registry.lookup("bar", "baz") is None