about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py')
-rw-r--r--.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py139
1 files changed, 139 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py b/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py
new file mode 100644
index 00000000..ad4b5a9e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/bs4/tests/test_builder_registry.py
@@ -0,0 +1,139 @@
+"""Tests of the builder registry."""
+
+import pytest
+import warnings
+from typing import Type
+
+from bs4 import BeautifulSoup
+from bs4.builder import (
+    builder_registry as registry,
+    TreeBuilder,
+    TreeBuilderRegistry,
+)
+from bs4.builder._htmlparser import HTMLParserTreeBuilder
+
+from . import (
+    HTML5LIB_PRESENT,
+    LXML_PRESENT,
+)
+
+if HTML5LIB_PRESENT:
+    from bs4.builder._html5lib import HTML5TreeBuilder
+
+if LXML_PRESENT:
+    from bs4.builder._lxml import (
+        LXMLTreeBuilderForXML,
+        LXMLTreeBuilder,
+    )
+
+
+# TODO: Split out the lxml and html5lib tests into their own classes
+# and gate with pytest.mark.skipIf.
+class TestBuiltInRegistry(object):
+    """Test the built-in registry with the default builders registered."""
+
+    def test_combination(self):
+        assert registry.lookup("strict", "html") == HTMLParserTreeBuilder
+        if LXML_PRESENT:
+            assert registry.lookup("fast", "html") == LXMLTreeBuilder
+            assert registry.lookup("permissive", "xml") == LXMLTreeBuilderForXML
+        if HTML5LIB_PRESENT:
+            assert registry.lookup("html5lib", "html") == HTML5TreeBuilder
+
+    def test_lookup_by_markup_type(self):
+        if LXML_PRESENT:
+            assert registry.lookup("html") == LXMLTreeBuilder
+            assert registry.lookup("xml") == LXMLTreeBuilderForXML
+        else:
+            assert registry.lookup("xml") is None
+            if HTML5LIB_PRESENT:
+                assert registry.lookup("html") == HTML5TreeBuilder
+            else:
+                assert registry.lookup("html") == HTMLParserTreeBuilder
+
+    def test_named_library(self):
+        if LXML_PRESENT:
+            assert registry.lookup("lxml", "xml") == LXMLTreeBuilderForXML
+            assert registry.lookup("lxml", "html") == LXMLTreeBuilder
+        if HTML5LIB_PRESENT:
+            assert registry.lookup("html5lib") == HTML5TreeBuilder
+
+        assert registry.lookup("html.parser") == HTMLParserTreeBuilder
+
+    def test_beautifulsoup_constructor_does_lookup(self):
+        with warnings.catch_warnings(record=True):
+            # This will create a warning about not explicitly
+            # specifying a parser, but we'll ignore it.
+
+            # You can pass in a string.
+            BeautifulSoup("", features="html")
+            # Or a list of strings.
+            BeautifulSoup("", features=["html", "fast"])
+            pass
+
+        # You'll get an exception if BS can't find an appropriate
+        # builder.
+        with pytest.raises(ValueError):
+            BeautifulSoup("", features="no-such-feature")
+
+
+class TestRegistry(object):
+    """Test the TreeBuilderRegistry class in general."""
+
+    def setup_method(self):
+        self.registry = TreeBuilderRegistry()
+
+    def builder_for_features(self, *feature_list: str) -> Type[TreeBuilder]:
+        cls = type(
+            "Builder_" + "_".join(feature_list), (object,), {"features": feature_list}
+        )
+
+        self.registry.register(cls)
+        return cls
+
+    def test_register_with_no_features(self):
+        builder = self.builder_for_features()
+
+        # Since the builder advertises no features, you can't find it
+        # by looking up features.
+        assert self.registry.lookup("foo") is None
+
+        # But you can find it by doing a lookup with no features, if
+        # this happens to be the only registered builder.
+        assert self.registry.lookup() == builder
+
+    def test_register_with_features_makes_lookup_succeed(self):
+        builder = self.builder_for_features("foo", "bar")
+        assert self.registry.lookup("foo") is builder
+        assert self.registry.lookup("bar") is builder
+
+    def test_lookup_fails_when_no_builder_implements_feature(self):
+        assert self.registry.lookup("baz") is None
+
+    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
+        self.builder_for_features("foo")
+        builder2 = self.builder_for_features("bar")
+        assert self.registry.lookup() == builder2
+
+    def test_lookup_fails_when_no_tree_builders_registered(self):
+        assert self.registry.lookup() is None
+
+    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
+        self.builder_for_features("foo")
+        self.builder_for_features("bar")
+        has_both_early = self.builder_for_features("foo", "bar", "baz")
+        has_both_late = self.builder_for_features("foo", "bar", "quux")
+        self.builder_for_features("bar")
+        self.builder_for_features("foo")
+
+        # There are two builders featuring 'foo' and 'bar', but
+        # the one that also features 'quux' was registered later.
+        assert self.registry.lookup("foo", "bar") == has_both_late
+
+        # There is only one builder featuring 'foo', 'bar', and 'baz'.
+        assert self.registry.lookup("foo", "bar", "baz") == has_both_early
+
+    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
+        self.builder_for_features("foo", "bar")
+        self.builder_for_features("foo", "baz")
+        assert self.registry.lookup("bar", "baz") is None