aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/bs4/tests/test_formatter.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/bs4/tests/test_formatter.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/bs4/tests/test_formatter.py')
-rw-r--r--.venv/lib/python3.12/site-packages/bs4/tests/test_formatter.py170
1 files changed, 170 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/bs4/tests/test_formatter.py b/.venv/lib/python3.12/site-packages/bs4/tests/test_formatter.py
new file mode 100644
index 00000000..0b840c58
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/bs4/tests/test_formatter.py
@@ -0,0 +1,170 @@
+import pytest
+
+from bs4.element import Tag
+from bs4.formatter import (
+ Formatter,
+ HTMLFormatter,
+ XMLFormatter,
+)
+from . import SoupTest
+
+
+class TestFormatter(SoupTest):
+ def test_default_attributes(self):
+ # Test the default behavior of Formatter.attributes().
+ formatter = Formatter()
+ tag = Tag(name="tag")
+ tag["b"] = "1"
+ tag["a"] = "2"
+
+ # Attributes come out sorted by name. In Python 3, attributes
+ # normally come out of a dictionary in the order they were
+ # added.
+ assert [("a", "2"), ("b", "1")] == formatter.attributes(tag)
+
+ # This works even if Tag.attrs is None, though this shouldn't
+ # normally happen.
+ tag.attrs = None
+ assert [] == formatter.attributes(tag)
+
+ assert " " == formatter.indent
+
+ def test_sort_attributes(self):
+ # Test the ability to override Formatter.attributes() to,
+ # e.g., disable the normal sorting of attributes.
+ class UnsortedFormatter(Formatter):
+ def attributes(self, tag):
+ self.called_with = tag
+ for k, v in sorted(tag.attrs.items()):
+ if k == "ignore":
+ continue
+ yield k, v
+
+ soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
+ formatter = UnsortedFormatter()
+ decoded = soup.decode(formatter=formatter)
+
+ # attributes() was called on the <p> tag. It filtered out one
+ # attribute and sorted the other two.
+ assert formatter.called_with == soup.p
+ assert '<p aval="2" cval="1"></p>' == decoded
+
+ def test_empty_attributes_are_booleans(self):
+ # Test the behavior of empty_attributes_are_booleans as well
+ # as which Formatters have it enabled.
+
+ for name in ("html", "minimal", None):
+ formatter = HTMLFormatter.REGISTRY[name]
+ assert False is formatter.empty_attributes_are_booleans
+
+ formatter = XMLFormatter.REGISTRY[None]
+ assert False is formatter.empty_attributes_are_booleans
+
+ formatter = HTMLFormatter.REGISTRY["html5"]
+ assert True is formatter.empty_attributes_are_booleans
+
+ # Verify that the constructor sets the value.
+ formatter = Formatter(empty_attributes_are_booleans=True)
+ assert True is formatter.empty_attributes_are_booleans
+
+ # Now demonstrate what it does to markup.
+ for markup in ("<option selected></option>", '<option selected=""></option>'):
+ soup = self.soup(markup)
+ for formatter in ("html", "minimal", "xml", None):
+ assert b'<option selected=""></option>' == soup.option.encode(
+ formatter="html"
+ )
+ assert b"<option selected></option>" == soup.option.encode(
+ formatter="html5"
+ )
+
+ @pytest.mark.parametrize(
+ "indent,expect",
+ [
+ (None, "<a>\n<b>\ntext\n</b>\n</a>\n"),
+ (-1, "<a>\n<b>\ntext\n</b>\n</a>\n"),
+ (0, "<a>\n<b>\ntext\n</b>\n</a>\n"),
+ ("", "<a>\n<b>\ntext\n</b>\n</a>\n"),
+ (1, "<a>\n <b>\n text\n </b>\n</a>\n"),
+ (2, "<a>\n <b>\n text\n </b>\n</a>\n"),
+ ("\t", "<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>\n"),
+ ("abc", "<a>\nabc<b>\nabcabctext\nabc</b>\n</a>\n"),
+ # Some invalid inputs -- the default behavior is used.
+ (object(), "<a>\n <b>\n text\n </b>\n</a>\n"),
+ (b"bytes", "<a>\n <b>\n text\n </b>\n</a>\n"),
+ ],
+ )
+ def test_indent(self, indent, expect):
+ # Pretty-print a tree with a Formatter set to
+ # indent in a certain way and verify the results.
+ soup = self.soup("<a><b>text</b></a>")
+ formatter = Formatter(indent=indent)
+ assert soup.prettify(formatter=formatter) == expect
+
+ # Pretty-printing only happens with prettify(), not
+ # encode().
+ assert soup.encode(formatter=formatter) != expect
+
+ def test_default_indent_value(self):
+ formatter = Formatter()
+ assert formatter.indent == " "
+
+ @pytest.mark.parametrize("formatter,expect",
+ [
+ (HTMLFormatter(indent=1), "<p>\n a\n</p>\n"),
+ (HTMLFormatter(indent=2), "<p>\n a\n</p>\n"),
+ (XMLFormatter(indent=1), "<p>\n a\n</p>\n"),
+ (XMLFormatter(indent="\t"), "<p>\n\ta\n</p>\n"),
+ ] )
+ def test_indent_subclasses(self, formatter, expect):
+ soup = self.soup("<p>a</p>")
+ assert expect == soup.p.prettify(formatter=formatter)
+
+ @pytest.mark.parametrize(
+ "s,expect_html,expect_html5",
+ [
+ # The html5 formatter is much less aggressive about escaping ampersands
+ # than the html formatter.
+ ("foo & bar", "foo &amp; bar", "foo & bar"),
+ ("foo&", "foo&amp;", "foo&"),
+ ("foo&&& bar", "foo&amp;&amp;&amp; bar", "foo&&& bar"),
+ ("x=1&y=2", "x=1&amp;y=2", "x=1&y=2"),
+ ("&123", "&amp;123", "&123"),
+ ("&abc", "&amp;abc", "&abc"),
+ ("foo &0 bar", "foo &amp;0 bar", "foo &0 bar"),
+ ("foo &lolwat bar", "foo &amp;lolwat bar", "foo &lolwat bar"),
+ # But both formatters escape what the HTML5 spec considers ambiguous ampersands.
+ ("&nosuchentity;", "&amp;nosuchentity;", "&amp;nosuchentity;"),
+ ],
+ )
+ def test_entity_substitution(self, s, expect_html, expect_html5):
+ assert HTMLFormatter.REGISTRY["html"].substitute(s) == expect_html
+ assert HTMLFormatter.REGISTRY["html5"].substitute(s) == expect_html5
+ assert HTMLFormatter.REGISTRY["html5-4.12"].substitute(s) == expect_html
+
+ def test_entity_round_trip(self):
+ # This is more an explanatory test and a way to avoid regressions than a test of functionality.
+
+ markup = "<p>Some division signs: ÷ &divide; &#247; &#xf7;. These are made with: ÷ &amp;divide; &amp;#247;</p>"
+ soup = self.soup(markup)
+ assert (
+ "Some division signs: ÷ ÷ ÷ ÷. These are made with: ÷ &divide; &#247;"
+ == soup.p.string
+ )
+
+ # Oops, I forgot to mention the entity.
+ soup.p.string = soup.p.string + " &#xf7;"
+
+ assert (
+ "Some division signs: ÷ ÷ ÷ ÷. These are made with: ÷ &divide; &#247; &#xf7;"
+ == soup.p.string
+ )
+
+ expect = "<p>Some division signs: &divide; &divide; &divide; &divide;. These are made with: &divide; &amp;divide; &amp;#247; &amp;#xf7;</p>"
+ assert expect == soup.p.decode(formatter="html")
+ assert expect == soup.p.decode(formatter="html5")
+
+ markup = "<p>a & b</p>"
+ soup = self.soup(markup)
+ assert "<p>a &amp; b</p>" == soup.p.decode(formatter="html")
+ assert "<p>a & b</p>" == soup.p.decode(formatter="html5")