aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py')
-rw-r--r--.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py144
1 files changed, 144 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py b/.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py
new file mode 100644
index 00000000..3e33469f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/bs4/tests/test_navigablestring.py
@@ -0,0 +1,144 @@
+import pytest
+
+from bs4.element import (
+ CData,
+ Comment,
+ Declaration,
+ Doctype,
+ NavigableString,
+ RubyParenthesisString,
+ RubyTextString,
+ Script,
+ Stylesheet,
+ TemplateString,
+)
+
+from . import SoupTest
+
+
+class TestNavigableString(SoupTest):
+ def test_text_acquisition_methods(self):
+ # These methods are intended for use against Tag, but they
+ # work on NavigableString as well,
+
+ s = NavigableString("fee ")
+ cdata = CData("fie ")
+ comment = Comment("foe ")
+
+ assert "fee " == s.get_text()
+ assert "fee " == s.string
+ assert "fee" == s.get_text(strip=True)
+ assert ["fee "] == list(s.strings)
+ assert ["fee"] == list(s.stripped_strings)
+ assert ["fee "] == list(s._all_strings())
+
+ assert "fie " == cdata.get_text()
+ assert "fie " == cdata.string
+ assert "fie" == cdata.get_text(strip=True)
+ assert ["fie "] == list(cdata.strings)
+ assert ["fie"] == list(cdata.stripped_strings)
+ assert ["fie "] == list(cdata._all_strings())
+
+ # Since a Comment isn't normally considered 'text',
+ # these methods generally do nothing.
+ assert "" == comment.get_text()
+ assert [] == list(comment.strings)
+ assert [] == list(comment.stripped_strings)
+ assert [] == list(comment._all_strings())
+
+ # Unless you specifically say that comments are okay.
+ assert "foe" == comment.get_text(strip=True, types=Comment)
+ assert "foe " == comment.get_text(types=(Comment, NavigableString))
+
+ def test_string_has_immutable_name_property(self):
+ # string.name is defined as None and can't be modified
+ string = self.soup("s").string
+ assert None is string.name
+ with pytest.raises(AttributeError):
+ string.name = "foo"
+
+
+class TestNavigableStringSubclasses(SoupTest):
+ def test_cdata(self):
+ # None of the current builders turn CDATA sections into CData
+ # objects, but you can create them manually.
+ soup = self.soup("")
+ cdata = CData("foo")
+ soup.insert(1, cdata)
+ assert str(soup) == "<![CDATA[foo]]>"
+ assert soup.find(string="foo") == "foo"
+ assert soup.contents[0] == "foo"
+
+ def test_cdata_is_never_formatted(self):
+ """Text inside a CData object is passed into the formatter.
+
+ But the return value is ignored.
+ """
+
+ self.count = 0
+
+ def increment(*args):
+ self.count += 1
+ return "BITTER FAILURE"
+
+ soup = self.soup("")
+ cdata = CData("<><><>")
+ soup.insert(1, cdata)
+ assert b"<![CDATA[<><><>]]>" == soup.encode(formatter=increment)
+ assert 1 == self.count
+
+ def test_doctype_ends_in_newline(self):
+ # Unlike other NavigableString subclasses, a DOCTYPE always ends
+ # in a newline.
+ doctype = Doctype("foo")
+ soup = self.soup("")
+ soup.insert(1, doctype)
+ assert soup.encode() == b"<!DOCTYPE foo>\n"
+
+ def test_declaration(self):
+ d = Declaration("foo")
+ assert "<?foo?>" == d.output_ready()
+
+ def test_default_string_containers(self):
+ # In some cases, we use different NavigableString subclasses for
+ # the same text in different tags.
+ soup = self.soup("<div>text</div><script>text</script><style>text</style>")
+ assert [NavigableString, Script, Stylesheet] == [
+ x.__class__ for x in soup.find_all(string=True)
+ ]
+
+ # The TemplateString is a little unusual because it's generally found
+ # _inside_ children of a <template> element, not a direct child of the
+ # <template> element.
+ soup = self.soup(
+ "<template>Some text<p>In a tag</p></template>Some text outside"
+ )
+ assert all(
+ isinstance(x, TemplateString)
+ for x in soup.template._all_strings(types=None)
+ )
+
+ # Once the <template> tag closed, we went back to using
+ # NavigableString.
+ outside = soup.template.next_sibling
+ assert isinstance(outside, NavigableString)
+ assert not isinstance(outside, TemplateString)
+
+ # The TemplateString is also unusual because it can contain
+ # NavigableString subclasses of _other_ types, such as
+ # Comment.
+ markup = b"<template>Some text<p>In a tag</p><!--with a comment--></template>"
+ soup = self.soup(markup)
+ assert markup == soup.template.encode("utf8")
+
+ def test_ruby_strings(self):
+ markup = "<ruby>漢 <rp>(</rp><rt>kan</rt><rp>)</rp> 字 <rp>(</rp><rt>ji</rt><rp>)</rp></ruby>"
+ soup = self.soup(markup)
+ assert isinstance(soup.rp.string, RubyParenthesisString)
+ assert isinstance(soup.rt.string, RubyTextString)
+
+ # Just as a demo, here's what this means for get_text usage.
+ assert "漢字" == soup.get_text(strip=True)
+ assert "漢(kan)字(ji)" == soup.get_text(
+ strip=True, types=(NavigableString, RubyTextString, RubyParenthesisString)
+ )