aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/bs4/tests/test_css.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/bs4/tests/test_css.py')
-rw-r--r--.venv/lib/python3.12/site-packages/bs4/tests/test_css.py536
1 files changed, 536 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/bs4/tests/test_css.py b/.venv/lib/python3.12/site-packages/bs4/tests/test_css.py
new file mode 100644
index 00000000..b1c42379
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/bs4/tests/test_css.py
@@ -0,0 +1,536 @@
+import pytest
+import types
+
+from bs4 import (
+ BeautifulSoup,
+ ResultSet,
+)
+
+from typing import (
+ Any,
+ List,
+ Tuple,
+ Type,
+)
+
+from packaging.version import Version
+
+from . import (
+ SoupTest,
+ SOUP_SIEVE_PRESENT,
+)
+
+SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS: Type[Exception]
+if SOUP_SIEVE_PRESENT:
+ from soupsieve import __version__, SelectorSyntaxError
+
+ # Some behavior changes in soupsieve 2.6 that affects one of our
+ # tests. For the test to run under all versions of Python
+ # supported by Beautiful Soup (which includes versions of Python
+ # not supported by soupsieve 2.6) we need to check both behaviors.
+ SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS = SelectorSyntaxError
+ if Version(__version__) < Version("2.6"):
+ SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS = NotImplementedError
+
+
+@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
+class TestCSSSelectors(SoupTest):
+ """Test basic CSS selector functionality.
+
+ This functionality is implemented in soupsieve, which has a much
+ more comprehensive test suite, so this is basically an extra check
+ that soupsieve works as expected.
+ """
+
+ HTML = """
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+"http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>The title</title>
+<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
+</head>
+<body>
+<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
+<div id="main" class="fancy">
+<div id="inner">
+<h1 id="header1">An H1</h1>
+<p>Some text</p>
+<p class="onep" id="p1">Some more text</p>
+<h2 id="header2">An H2</h2>
+<p class="class1 class2 class3" id="pmulti">Another</p>
+<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
+<h2 id="header3">Another H2</h2>
+<a id="me" href="http://simonwillison.net/" rel="me">me</a>
+<span class="s1">
+<a href="#" id="s1a1">span1a1</a>
+<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
+<span class="span2">
+<a href="#" id="s2a1">span2a1</a>
+</span>
+<span class="span3"></span>
+<custom-dashed-tag class="dashed" id="dash2"/>
+<div data-tag="dashedvalue" id="data1"/>
+</span>
+</div>
+<x id="xid">
+<z id="zida"/>
+<z id="zidab"/>
+<z id="zidac"/>
+</x>
+<y id="yid">
+<z id="zidb"/>
+</y>
+<p lang="en" id="lang-en">English</p>
+<p lang="en-gb" id="lang-en-gb">English UK</p>
+<p lang="en-us" id="lang-en-us">English US</p>
+<p lang="fr" id="lang-fr">French</p>
+</div>
+
+<div id="footer">
+</div>
+"""
+
+ def setup_method(self):
+ self._soup = BeautifulSoup(self.HTML, "html.parser")
+
+ def assert_css_selects(
+ self, selector: str, expected_ids: List[str], **kwargs: Any
+ ) -> None:
+ results = self._soup.select(selector, **kwargs)
+ assert isinstance(results, ResultSet)
+ el_ids = [el["id"] for el in results]
+ el_ids.sort()
+ expected_ids.sort()
+ assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
+ selector,
+ ", ".join(expected_ids),
+ ", ".join(el_ids),
+ )
+
+ assertSelect = assert_css_selects
+
+ def assert_css_select_multiple(self, *tests: Tuple[str, List[str]]):
+ for selector, expected_ids in tests:
+ self.assert_css_selects(selector, expected_ids)
+
+ def test_precompiled(self):
+ sel = self._soup.css.compile("div")
+
+ els = self._soup.select(sel)
+ assert len(els) == 4
+ for div in els:
+ assert div.name == "div"
+
+ el = self._soup.select_one(sel)
+ assert "main" == el["id"]
+
+ def test_one_tag_one(self):
+ els = self._soup.select("title")
+ assert len(els) == 1
+ assert els[0].name == "title"
+ assert els[0].contents == ["The title"]
+
+ def test_one_tag_many(self):
+ els = self._soup.select("div")
+ assert len(els) == 4
+ for div in els:
+ assert div.name == "div"
+
+ el = self._soup.select_one("div")
+ assert "main" == el["id"]
+
+ def test_select_one_returns_none_if_no_match(self):
+ match = self._soup.select_one("nonexistenttag")
+ assert None is match
+
+ def test_tag_in_tag_one(self):
+ self.assert_css_selects("div div", ["inner", "data1"])
+
+ def test_tag_in_tag_many(self):
+ for selector in ("html div", "html body div", "body div"):
+ self.assert_css_selects(selector, ["data1", "main", "inner", "footer"])
+
+ def test_limit(self):
+ self.assert_css_selects("html div", ["main"], limit=1)
+ self.assert_css_selects("html body div", ["inner", "main"], limit=2)
+ self.assert_css_selects(
+ "body div", ["data1", "main", "inner", "footer"], limit=10
+ )
+
+ def test_tag_no_match(self):
+ assert len(self._soup.select("del")) == 0
+
+ def test_invalid_tag(self):
+ with pytest.raises(SelectorSyntaxError):
+ self._soup.select("tag%t")
+
+ def test_select_dashed_tag_ids(self):
+ self.assert_css_selects("custom-dashed-tag", ["dash1", "dash2"])
+
+ def test_select_dashed_by_id(self):
+ dashed = self._soup.select('custom-dashed-tag[id="dash2"]')
+ assert dashed[0].name == "custom-dashed-tag"
+ assert dashed[0]["id"] == "dash2"
+
+ def test_dashed_tag_text(self):
+ assert self._soup.select("body > custom-dashed-tag")[0].text == "Hello there."
+
+ def test_select_dashed_matches_find_all(self):
+ assert self._soup.select("custom-dashed-tag") == self._soup.find_all(
+ "custom-dashed-tag"
+ )
+
+ def test_header_tags(self):
+ self.assert_css_select_multiple(
+ ("h1", ["header1"]),
+ ("h2", ["header2", "header3"]),
+ )
+
+ def test_class_one(self):
+ for selector in (".onep", "p.onep", "html p.onep"):
+ els = self._soup.select(selector)
+ assert len(els) == 1
+ assert els[0].name == "p"
+ assert els[0]["class"] == ["onep"]
+
+ def test_class_mismatched_tag(self):
+ els = self._soup.select("div.onep")
+ assert len(els) == 0
+
+ def test_one_id(self):
+ for selector in ("div#inner", "#inner", "div div#inner"):
+ self.assert_css_selects(selector, ["inner"])
+
+ def test_bad_id(self):
+ els = self._soup.select("#doesnotexist")
+ assert len(els) == 0
+
+ def test_items_in_id(self):
+ els = self._soup.select("div#inner p")
+ assert len(els) == 3
+ for el in els:
+ assert el.name == "p"
+ assert els[1]["class"] == ["onep"]
+ assert not els[0].has_attr("class")
+
+ def test_a_bunch_of_emptys(self):
+ for selector in ("div#main del", "div#main div.oops", "div div#main"):
+ assert len(self._soup.select(selector)) == 0
+
+ def test_multi_class_support(self):
+ for selector in (
+ ".class1",
+ "p.class1",
+ ".class2",
+ "p.class2",
+ ".class3",
+ "p.class3",
+ "html p.class2",
+ "div#inner .class2",
+ ):
+ self.assert_css_selects(selector, ["pmulti"])
+
+ def test_multi_class_selection(self):
+ for selector in (".class1.class3", ".class3.class2", ".class1.class2.class3"):
+ self.assert_css_selects(selector, ["pmulti"])
+
+ def test_child_selector(self):
+ self.assert_css_selects(".s1 > a", ["s1a1", "s1a2"])
+ self.assert_css_selects(".s1 > a span", ["s1a2s1"])
+
+ def test_child_selector_id(self):
+ self.assert_css_selects(".s1 > a#s1a2 span", ["s1a2s1"])
+
+ def test_attribute_equals(self):
+ self.assert_css_select_multiple(
+ ('p[class="onep"]', ["p1"]),
+ ('p[id="p1"]', ["p1"]),
+ ('[class="onep"]', ["p1"]),
+ ('[id="p1"]', ["p1"]),
+ ('link[rel="stylesheet"]', ["l1"]),
+ ('link[type="text/css"]', ["l1"]),
+ ('link[href="blah.css"]', ["l1"]),
+ ('link[href="no-blah.css"]', []),
+ ('[rel="stylesheet"]', ["l1"]),
+ ('[type="text/css"]', ["l1"]),
+ ('[href="blah.css"]', ["l1"]),
+ ('[href="no-blah.css"]', []),
+ ('p[href="no-blah.css"]', []),
+ ('[href="no-blah.css"]', []),
+ )
+
+ def test_attribute_tilde(self):
+ self.assert_css_select_multiple(
+ ('p[class~="class1"]', ["pmulti"]),
+ ('p[class~="class2"]', ["pmulti"]),
+ ('p[class~="class3"]', ["pmulti"]),
+ ('[class~="class1"]', ["pmulti"]),
+ ('[class~="class2"]', ["pmulti"]),
+ ('[class~="class3"]', ["pmulti"]),
+ ('a[rel~="friend"]', ["bob"]),
+ ('a[rel~="met"]', ["bob"]),
+ ('[rel~="friend"]', ["bob"]),
+ ('[rel~="met"]', ["bob"]),
+ )
+
+ def test_attribute_startswith(self):
+ self.assert_css_select_multiple(
+ ('[rel^="style"]', ["l1"]),
+ ('link[rel^="style"]', ["l1"]),
+ ('notlink[rel^="notstyle"]', []),
+ ('[rel^="notstyle"]', []),
+ ('link[rel^="notstyle"]', []),
+ ('link[href^="bla"]', ["l1"]),
+ ('a[href^="http://"]', ["bob", "me"]),
+ ('[href^="http://"]', ["bob", "me"]),
+ ('[id^="p"]', ["pmulti", "p1"]),
+ ('[id^="m"]', ["me", "main"]),
+ ('div[id^="m"]', ["main"]),
+ ('a[id^="m"]', ["me"]),
+ ('div[data-tag^="dashed"]', ["data1"]),
+ )
+
+ def test_attribute_endswith(self):
+ self.assert_css_select_multiple(
+ ('[href$=".css"]', ["l1"]),
+ ('link[href$=".css"]', ["l1"]),
+ ('link[id$="1"]', ["l1"]),
+ (
+ '[id$="1"]',
+ ["data1", "l1", "p1", "header1", "s1a1", "s2a1", "s1a2s1", "dash1"],
+ ),
+ ('div[id$="1"]', ["data1"]),
+ ('[id$="noending"]', []),
+ )
+
+ def test_attribute_contains(self):
+ self.assert_css_select_multiple(
+ # From test_attribute_startswith
+ ('[rel*="style"]', ["l1"]),
+ ('link[rel*="style"]', ["l1"]),
+ ('notlink[rel*="notstyle"]', []),
+ ('[rel*="notstyle"]', []),
+ ('link[rel*="notstyle"]', []),
+ ('link[href*="bla"]', ["l1"]),
+ ('[href*="http://"]', ["bob", "me"]),
+ ('[id*="p"]', ["pmulti", "p1"]),
+ ('div[id*="m"]', ["main"]),
+ ('a[id*="m"]', ["me"]),
+ # From test_attribute_endswith
+ ('[href*=".css"]', ["l1"]),
+ ('link[href*=".css"]', ["l1"]),
+ ('link[id*="1"]', ["l1"]),
+ (
+ '[id*="1"]',
+ [
+ "data1",
+ "l1",
+ "p1",
+ "header1",
+ "s1a1",
+ "s1a2",
+ "s2a1",
+ "s1a2s1",
+ "dash1",
+ ],
+ ),
+ ('div[id*="1"]', ["data1"]),
+ ('[id*="noending"]', []),
+ # New for this test
+ ('[href*="."]', ["bob", "me", "l1"]),
+ ('a[href*="."]', ["bob", "me"]),
+ ('link[href*="."]', ["l1"]),
+ ('div[id*="n"]', ["main", "inner"]),
+ ('div[id*="nn"]', ["inner"]),
+ ('div[data-tag*="edval"]', ["data1"]),
+ )
+
+ def test_attribute_exact_or_hypen(self):
+ self.assert_css_select_multiple(
+ ('p[lang|="en"]', ["lang-en", "lang-en-gb", "lang-en-us"]),
+ ('[lang|="en"]', ["lang-en", "lang-en-gb", "lang-en-us"]),
+ ('p[lang|="fr"]', ["lang-fr"]),
+ ('p[lang|="gb"]', []),
+ )
+
+ def test_attribute_exists(self):
+ self.assert_css_select_multiple(
+ ("[rel]", ["l1", "bob", "me"]),
+ ("link[rel]", ["l1"]),
+ ("a[rel]", ["bob", "me"]),
+ ("[lang]", ["lang-en", "lang-en-gb", "lang-en-us", "lang-fr"]),
+ ("p[class]", ["p1", "pmulti"]),
+ ("[blah]", []),
+ ("p[blah]", []),
+ ("div[data-tag]", ["data1"]),
+ )
+
+ def test_quoted_space_in_selector_name(self):
+ html = """<div style="display: wrong">nope</div>
+ <div style="display: right">yes</div>
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ [chosen] = soup.select('div[style="display: right"]')
+ assert "yes" == chosen.string
+
+ def test_unsupported_pseudoclass(self):
+ with pytest.raises(SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS):
+ self._soup.select("a:no-such-pseudoclass")
+
+ with pytest.raises(SelectorSyntaxError):
+ self._soup.select("a:nth-of-type(a)")
+
+ def test_nth_of_type(self):
+ # Try to select first paragraph
+ els = self._soup.select("div#inner p:nth-of-type(1)")
+ assert len(els) == 1
+ assert els[0].string == "Some text"
+
+ # Try to select third paragraph
+ els = self._soup.select("div#inner p:nth-of-type(3)")
+ assert len(els) == 1
+ assert els[0].string == "Another"
+
+ # Try to select (non-existent!) fourth paragraph
+ els = self._soup.select("div#inner p:nth-of-type(4)")
+ assert len(els) == 0
+
+ # Zero will select no tags.
+ els = self._soup.select("div p:nth-of-type(0)")
+ assert len(els) == 0
+
+ def test_nth_of_type_direct_descendant(self):
+ els = self._soup.select("div#inner > p:nth-of-type(1)")
+ assert len(els) == 1
+ assert els[0].string == "Some text"
+
+ def test_id_child_selector_nth_of_type(self):
+ self.assert_css_selects("#inner > p:nth-of-type(2)", ["p1"])
+
+ def test_select_on_element(self):
+ # Other tests operate on the tree; this operates on an element
+ # within the tree.
+ inner = self._soup.find("div", id="main")
+ selected = inner.select("div")
+ # The <div id="inner"> tag was selected. The <div id="footer">
+ # tag was not.
+ self.assert_selects_ids(selected, ["inner", "data1"])
+
+ def test_overspecified_child_id(self):
+ self.assert_css_selects(".fancy #inner", ["inner"])
+ self.assert_css_selects(".normal #inner", [])
+
+ def test_adjacent_sibling_selector(self):
+ self.assert_css_selects("#p1 + h2", ["header2"])
+ self.assert_css_selects("#p1 + h2 + p", ["pmulti"])
+ self.assert_css_selects("#p1 + #header2 + .class1", ["pmulti"])
+ assert [] == self._soup.select("#p1 + p")
+
+ def test_general_sibling_selector(self):
+ self.assert_css_selects("#p1 ~ h2", ["header2", "header3"])
+ self.assert_css_selects("#p1 ~ #header2", ["header2"])
+ self.assert_css_selects("#p1 ~ h2 + a", ["me"])
+ self.assert_css_selects('#p1 ~ h2 + [rel="me"]', ["me"])
+ assert [] == self._soup.select("#inner ~ h2")
+
+ def test_dangling_combinator(self):
+ with pytest.raises(SelectorSyntaxError):
+ self._soup.select("h1 >")
+
+ def test_sibling_combinator_wont_select_same_tag_twice(self):
+ self.assert_css_selects("p[lang] ~ p", ["lang-en-gb", "lang-en-us", "lang-fr"])
+
+ # Test the selector grouping operator (the comma)
+ def test_multiple_select(self):
+ self.assert_css_selects("x, y", ["xid", "yid"])
+
+ def test_multiple_select_with_no_space(self):
+ self.assert_css_selects("x,y", ["xid", "yid"])
+
+ def test_multiple_select_with_more_space(self):
+ self.assert_css_selects("x, y", ["xid", "yid"])
+
+ def test_multiple_select_duplicated(self):
+ self.assert_css_selects("x, x", ["xid"])
+
+ def test_multiple_select_sibling(self):
+ self.assert_css_selects("x, y ~ p[lang=fr]", ["xid", "lang-fr"])
+
+ def test_multiple_select_tag_and_direct_descendant(self):
+ self.assert_css_selects("x, y > z", ["xid", "zidb"])
+
+ def test_multiple_select_direct_descendant_and_tags(self):
+ self.assert_css_selects(
+ "div > x, y, z", ["xid", "yid", "zida", "zidb", "zidab", "zidac"]
+ )
+
+ def test_multiple_select_indirect_descendant(self):
+ self.assert_css_selects(
+ "div x,y, z", ["xid", "yid", "zida", "zidb", "zidab", "zidac"]
+ )
+
+ def test_invalid_multiple_select(self):
+ with pytest.raises(SelectorSyntaxError):
+ self._soup.select(",x, y")
+ with pytest.raises(SelectorSyntaxError):
+ self._soup.select("x,,y")
+
+ def test_multiple_select_attrs(self):
+ self.assert_css_selects("p[lang=en], p[lang=en-gb]", ["lang-en", "lang-en-gb"])
+
+ def test_multiple_select_ids(self):
+ self.assert_css_selects(
+ "x, y > z[id=zida], z[id=zidab], z[id=zidb]", ["xid", "zidb", "zidab"]
+ )
+
+ def test_multiple_select_nested(self):
+ self.assert_css_selects("body > div > x, y > z", ["xid", "zidb"])
+
+ def test_select_duplicate_elements(self):
+ # When markup contains duplicate elements, a multiple select
+ # will find all of them.
+ markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
+ soup = BeautifulSoup(markup, "html.parser")
+ selected = soup.select(".c1, .c2")
+ assert 3 == len(selected)
+
+ # Verify that find_all finds the same elements, though because
+ # of an implementation detail it finds them in a different
+ # order.
+ for element in soup.find_all(class_=["c1", "c2"]):
+ assert element in selected
+
+ def test_closest(self):
+ inner = self._soup.find("div", id="inner")
+ closest = inner.css.closest("div[id=main]")
+ assert closest == self._soup.find("div", id="main")
+
+ def test_match(self):
+ inner = self._soup.find("div", id="inner")
+ main = self._soup.find("div", id="main")
+ assert inner.css.match("div[id=main]") is False
+ assert main.css.match("div[id=main]") is True
+
+ def test_iselect(self):
+ gen = self._soup.css.iselect("h2")
+ assert isinstance(gen, types.GeneratorType)
+ [header2, header3] = gen
+ assert header2["id"] == "header2"
+ assert header3["id"] == "header3"
+
+ def test_filter(self):
+ inner = self._soup.find("div", id="inner")
+ results = inner.css.filter("h2")
+ assert len(inner.css.filter("h2")) == 2
+
+ results = inner.css.filter("h2[id=header3]")
+ assert isinstance(results, ResultSet)
+ [result] = results
+ assert result["id"] == "header3"
+
+ def test_escape(self):
+ m = self._soup.css.escape
+ assert m(".foo#bar") == "\\.foo\\#bar"
+ assert m("()[]{}") == "\\(\\)\\[\\]\\{\\}"
+ assert m(".foo") == self._soup.css.escape(".foo")