From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../bs4/tests/test_navigablestring.py | 144 +++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py (limited to '.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py') diff --git a/.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py b/.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py new file mode 100644 index 00000000..3e33469f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py @@ -0,0 +1,144 @@ +import pytest + +from bs4.element import ( + CData, + Comment, + Declaration, + Doctype, + NavigableString, + RubyParenthesisString, + RubyTextString, + Script, + Stylesheet, + TemplateString, +) + +from . import SoupTest + + +class TestNavigableString(SoupTest): + def test_text_acquisition_methods(self): + # These methods are intended for use against Tag, but they + # work on NavigableString as well, + + s = NavigableString("fee ") + cdata = CData("fie ") + comment = Comment("foe ") + + assert "fee " == s.get_text() + assert "fee " == s.string + assert "fee" == s.get_text(strip=True) + assert ["fee "] == list(s.strings) + assert ["fee"] == list(s.stripped_strings) + assert ["fee "] == list(s._all_strings()) + + assert "fie " == cdata.get_text() + assert "fie " == cdata.string + assert "fie" == cdata.get_text(strip=True) + assert ["fie "] == list(cdata.strings) + assert ["fie"] == list(cdata.stripped_strings) + assert ["fie "] == list(cdata._all_strings()) + + # Since a Comment isn't normally considered 'text', + # these methods generally do nothing. + assert "" == comment.get_text() + assert [] == list(comment.strings) + assert [] == list(comment.stripped_strings) + assert [] == list(comment._all_strings()) + + # Unless you specifically say that comments are okay. + assert "foe" == comment.get_text(strip=True, types=Comment) + assert "foe " == comment.get_text(types=(Comment, NavigableString)) + + def test_string_has_immutable_name_property(self): + # string.name is defined as None and can't be modified + string = self.soup("s").string + assert None is string.name + with pytest.raises(AttributeError): + string.name = "foo" + + +class TestNavigableStringSubclasses(SoupTest): + def test_cdata(self): + # None of the current builders turn CDATA sections into CData + # objects, but you can create them manually. + soup = self.soup("") + cdata = CData("foo") + soup.insert(1, cdata) + assert str(soup) == "" + assert soup.find(string="foo") == "foo" + assert soup.contents[0] == "foo" + + def test_cdata_is_never_formatted(self): + """Text inside a CData object is passed into the formatter. + + But the return value is ignored. + """ + + self.count = 0 + + def increment(*args): + self.count += 1 + return "BITTER FAILURE" + + soup = self.soup("") + cdata = CData("<><><>") + soup.insert(1, cdata) + assert b"<><>]]>" == soup.encode(formatter=increment) + assert 1 == self.count + + def test_doctype_ends_in_newline(self): + # Unlike other NavigableString subclasses, a DOCTYPE always ends + # in a newline. + doctype = Doctype("foo") + soup = self.soup("") + soup.insert(1, doctype) + assert soup.encode() == b"\n" + + def test_declaration(self): + d = Declaration("foo") + assert "" == d.output_ready() + + def test_default_string_containers(self): + # In some cases, we use different NavigableString subclasses for + # the same text in different tags. + soup = self.soup("
In a tag
Some text outside" + ) + assert all( + isinstance(x, TemplateString) + for x in soup.template._all_strings(types=None) + ) + + # Once the tag closed, we went back to using + # NavigableString. + outside = soup.template.next_sibling + assert isinstance(outside, NavigableString) + assert not isinstance(outside, TemplateString) + + # The TemplateString is also unusual because it can contain + # NavigableString subclasses of _other_ types, such as + # Comment. + markup = b"Some textIn a tag
" + soup = self.soup(markup) + assert markup == soup.template.encode("utf8") + + def test_ruby_strings(self): + markup = "漢 字 " + soup = self.soup(markup) + assert isinstance(soup.rp.string, RubyParenthesisString) + assert isinstance(soup.rt.string, RubyTextString) + + # Just as a demo, here's what this means for get_text usage. + assert "漢字" == soup.get_text(strip=True) + assert "漢(kan)字(ji)" == soup.get_text( + strip=True, types=(NavigableString, RubyTextString, RubyParenthesisString) + ) -- cgit v1.2.3