From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../python3.12/site-packages/bs4/tests/test_css.py | 536 +++++++++++++++++++++ 1 file changed, 536 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/bs4/tests/test_css.py (limited to '.venv/lib/python3.12/site-packages/bs4/tests/test_css.py') diff --git a/.venv/lib/python3.12/site-packages/bs4/tests/test_css.py b/.venv/lib/python3.12/site-packages/bs4/tests/test_css.py new file mode 100644 index 00000000..b1c42379 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/bs4/tests/test_css.py @@ -0,0 +1,536 @@ +import pytest +import types + +from bs4 import ( + BeautifulSoup, + ResultSet, +) + +from typing import ( + Any, + List, + Tuple, + Type, +) + +from packaging.version import Version + +from . import ( + SoupTest, + SOUP_SIEVE_PRESENT, +) + +SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS: Type[Exception] +if SOUP_SIEVE_PRESENT: + from soupsieve import __version__, SelectorSyntaxError + + # Some behavior changes in soupsieve 2.6 that affects one of our + # tests. For the test to run under all versions of Python + # supported by Beautiful Soup (which includes versions of Python + # not supported by soupsieve 2.6) we need to check both behaviors. + SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS = SelectorSyntaxError + if Version(__version__) < Version("2.6"): + SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS = NotImplementedError + + +@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed") +class TestCSSSelectors(SoupTest): + """Test basic CSS selector functionality. + + This functionality is implemented in soupsieve, which has a much + more comprehensive test suite, so this is basically an extra check + that soupsieve works as expected. + """ + + HTML = """ + + + +The title + + + +Hello there. +
+
+

An H1

+

Some text

+

Some more text

+

An H2

+

Another

+Bob +

Another H2

+me + +span1a1 +span1a2 test + +span2a1 + + + +
+ +
+ + + + + + + + +

English

+

English UK

+

English US

+

French

+
+ + +""" + + def setup_method(self): + self._soup = BeautifulSoup(self.HTML, "html.parser") + + def assert_css_selects( + self, selector: str, expected_ids: List[str], **kwargs: Any + ) -> None: + results = self._soup.select(selector, **kwargs) + assert isinstance(results, ResultSet) + el_ids = [el["id"] for el in results] + el_ids.sort() + expected_ids.sort() + assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % ( + selector, + ", ".join(expected_ids), + ", ".join(el_ids), + ) + + assertSelect = assert_css_selects + + def assert_css_select_multiple(self, *tests: Tuple[str, List[str]]): + for selector, expected_ids in tests: + self.assert_css_selects(selector, expected_ids) + + def test_precompiled(self): + sel = self._soup.css.compile("div") + + els = self._soup.select(sel) + assert len(els) == 4 + for div in els: + assert div.name == "div" + + el = self._soup.select_one(sel) + assert "main" == el["id"] + + def test_one_tag_one(self): + els = self._soup.select("title") + assert len(els) == 1 + assert els[0].name == "title" + assert els[0].contents == ["The title"] + + def test_one_tag_many(self): + els = self._soup.select("div") + assert len(els) == 4 + for div in els: + assert div.name == "div" + + el = self._soup.select_one("div") + assert "main" == el["id"] + + def test_select_one_returns_none_if_no_match(self): + match = self._soup.select_one("nonexistenttag") + assert None is match + + def test_tag_in_tag_one(self): + self.assert_css_selects("div div", ["inner", "data1"]) + + def test_tag_in_tag_many(self): + for selector in ("html div", "html body div", "body div"): + self.assert_css_selects(selector, ["data1", "main", "inner", "footer"]) + + def test_limit(self): + self.assert_css_selects("html div", ["main"], limit=1) + self.assert_css_selects("html body div", ["inner", "main"], limit=2) + self.assert_css_selects( + "body div", ["data1", "main", "inner", "footer"], limit=10 + ) + + def test_tag_no_match(self): + assert len(self._soup.select("del")) == 0 + + def test_invalid_tag(self): + with pytest.raises(SelectorSyntaxError): + self._soup.select("tag%t") + + def test_select_dashed_tag_ids(self): + self.assert_css_selects("custom-dashed-tag", ["dash1", "dash2"]) + + def test_select_dashed_by_id(self): + dashed = self._soup.select('custom-dashed-tag[id="dash2"]') + assert dashed[0].name == "custom-dashed-tag" + assert dashed[0]["id"] == "dash2" + + def test_dashed_tag_text(self): + assert self._soup.select("body > custom-dashed-tag")[0].text == "Hello there." + + def test_select_dashed_matches_find_all(self): + assert self._soup.select("custom-dashed-tag") == self._soup.find_all( + "custom-dashed-tag" + ) + + def test_header_tags(self): + self.assert_css_select_multiple( + ("h1", ["header1"]), + ("h2", ["header2", "header3"]), + ) + + def test_class_one(self): + for selector in (".onep", "p.onep", "html p.onep"): + els = self._soup.select(selector) + assert len(els) == 1 + assert els[0].name == "p" + assert els[0]["class"] == ["onep"] + + def test_class_mismatched_tag(self): + els = self._soup.select("div.onep") + assert len(els) == 0 + + def test_one_id(self): + for selector in ("div#inner", "#inner", "div div#inner"): + self.assert_css_selects(selector, ["inner"]) + + def test_bad_id(self): + els = self._soup.select("#doesnotexist") + assert len(els) == 0 + + def test_items_in_id(self): + els = self._soup.select("div#inner p") + assert len(els) == 3 + for el in els: + assert el.name == "p" + assert els[1]["class"] == ["onep"] + assert not els[0].has_attr("class") + + def test_a_bunch_of_emptys(self): + for selector in ("div#main del", "div#main div.oops", "div div#main"): + assert len(self._soup.select(selector)) == 0 + + def test_multi_class_support(self): + for selector in ( + ".class1", + "p.class1", + ".class2", + "p.class2", + ".class3", + "p.class3", + "html p.class2", + "div#inner .class2", + ): + self.assert_css_selects(selector, ["pmulti"]) + + def test_multi_class_selection(self): + for selector in (".class1.class3", ".class3.class2", ".class1.class2.class3"): + self.assert_css_selects(selector, ["pmulti"]) + + def test_child_selector(self): + self.assert_css_selects(".s1 > a", ["s1a1", "s1a2"]) + self.assert_css_selects(".s1 > a span", ["s1a2s1"]) + + def test_child_selector_id(self): + self.assert_css_selects(".s1 > a#s1a2 span", ["s1a2s1"]) + + def test_attribute_equals(self): + self.assert_css_select_multiple( + ('p[class="onep"]', ["p1"]), + ('p[id="p1"]', ["p1"]), + ('[class="onep"]', ["p1"]), + ('[id="p1"]', ["p1"]), + ('link[rel="stylesheet"]', ["l1"]), + ('link[type="text/css"]', ["l1"]), + ('link[href="blah.css"]', ["l1"]), + ('link[href="no-blah.css"]', []), + ('[rel="stylesheet"]', ["l1"]), + ('[type="text/css"]', ["l1"]), + ('[href="blah.css"]', ["l1"]), + ('[href="no-blah.css"]', []), + ('p[href="no-blah.css"]', []), + ('[href="no-blah.css"]', []), + ) + + def test_attribute_tilde(self): + self.assert_css_select_multiple( + ('p[class~="class1"]', ["pmulti"]), + ('p[class~="class2"]', ["pmulti"]), + ('p[class~="class3"]', ["pmulti"]), + ('[class~="class1"]', ["pmulti"]), + ('[class~="class2"]', ["pmulti"]), + ('[class~="class3"]', ["pmulti"]), + ('a[rel~="friend"]', ["bob"]), + ('a[rel~="met"]', ["bob"]), + ('[rel~="friend"]', ["bob"]), + ('[rel~="met"]', ["bob"]), + ) + + def test_attribute_startswith(self): + self.assert_css_select_multiple( + ('[rel^="style"]', ["l1"]), + ('link[rel^="style"]', ["l1"]), + ('notlink[rel^="notstyle"]', []), + ('[rel^="notstyle"]', []), + ('link[rel^="notstyle"]', []), + ('link[href^="bla"]', ["l1"]), + ('a[href^="http://"]', ["bob", "me"]), + ('[href^="http://"]', ["bob", "me"]), + ('[id^="p"]', ["pmulti", "p1"]), + ('[id^="m"]', ["me", "main"]), + ('div[id^="m"]', ["main"]), + ('a[id^="m"]', ["me"]), + ('div[data-tag^="dashed"]', ["data1"]), + ) + + def test_attribute_endswith(self): + self.assert_css_select_multiple( + ('[href$=".css"]', ["l1"]), + ('link[href$=".css"]', ["l1"]), + ('link[id$="1"]', ["l1"]), + ( + '[id$="1"]', + ["data1", "l1", "p1", "header1", "s1a1", "s2a1", "s1a2s1", "dash1"], + ), + ('div[id$="1"]', ["data1"]), + ('[id$="noending"]', []), + ) + + def test_attribute_contains(self): + self.assert_css_select_multiple( + # From test_attribute_startswith + ('[rel*="style"]', ["l1"]), + ('link[rel*="style"]', ["l1"]), + ('notlink[rel*="notstyle"]', []), + ('[rel*="notstyle"]', []), + ('link[rel*="notstyle"]', []), + ('link[href*="bla"]', ["l1"]), + ('[href*="http://"]', ["bob", "me"]), + ('[id*="p"]', ["pmulti", "p1"]), + ('div[id*="m"]', ["main"]), + ('a[id*="m"]', ["me"]), + # From test_attribute_endswith + ('[href*=".css"]', ["l1"]), + ('link[href*=".css"]', ["l1"]), + ('link[id*="1"]', ["l1"]), + ( + '[id*="1"]', + [ + "data1", + "l1", + "p1", + "header1", + "s1a1", + "s1a2", + "s2a1", + "s1a2s1", + "dash1", + ], + ), + ('div[id*="1"]', ["data1"]), + ('[id*="noending"]', []), + # New for this test + ('[href*="."]', ["bob", "me", "l1"]), + ('a[href*="."]', ["bob", "me"]), + ('link[href*="."]', ["l1"]), + ('div[id*="n"]', ["main", "inner"]), + ('div[id*="nn"]', ["inner"]), + ('div[data-tag*="edval"]', ["data1"]), + ) + + def test_attribute_exact_or_hypen(self): + self.assert_css_select_multiple( + ('p[lang|="en"]', ["lang-en", "lang-en-gb", "lang-en-us"]), + ('[lang|="en"]', ["lang-en", "lang-en-gb", "lang-en-us"]), + ('p[lang|="fr"]', ["lang-fr"]), + ('p[lang|="gb"]', []), + ) + + def test_attribute_exists(self): + self.assert_css_select_multiple( + ("[rel]", ["l1", "bob", "me"]), + ("link[rel]", ["l1"]), + ("a[rel]", ["bob", "me"]), + ("[lang]", ["lang-en", "lang-en-gb", "lang-en-us", "lang-fr"]), + ("p[class]", ["p1", "pmulti"]), + ("[blah]", []), + ("p[blah]", []), + ("div[data-tag]", ["data1"]), + ) + + def test_quoted_space_in_selector_name(self): + html = """
nope
+
yes
+ """ + soup = BeautifulSoup(html, "html.parser") + [chosen] = soup.select('div[style="display: right"]') + assert "yes" == chosen.string + + def test_unsupported_pseudoclass(self): + with pytest.raises(SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS): + self._soup.select("a:no-such-pseudoclass") + + with pytest.raises(SelectorSyntaxError): + self._soup.select("a:nth-of-type(a)") + + def test_nth_of_type(self): + # Try to select first paragraph + els = self._soup.select("div#inner p:nth-of-type(1)") + assert len(els) == 1 + assert els[0].string == "Some text" + + # Try to select third paragraph + els = self._soup.select("div#inner p:nth-of-type(3)") + assert len(els) == 1 + assert els[0].string == "Another" + + # Try to select (non-existent!) fourth paragraph + els = self._soup.select("div#inner p:nth-of-type(4)") + assert len(els) == 0 + + # Zero will select no tags. + els = self._soup.select("div p:nth-of-type(0)") + assert len(els) == 0 + + def test_nth_of_type_direct_descendant(self): + els = self._soup.select("div#inner > p:nth-of-type(1)") + assert len(els) == 1 + assert els[0].string == "Some text" + + def test_id_child_selector_nth_of_type(self): + self.assert_css_selects("#inner > p:nth-of-type(2)", ["p1"]) + + def test_select_on_element(self): + # Other tests operate on the tree; this operates on an element + # within the tree. + inner = self._soup.find("div", id="main") + selected = inner.select("div") + # The
tag was selected. The