aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/soupsieve
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/soupsieve
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/soupsieve')
-rw-r--r--.venv/lib/python3.12/site-packages/soupsieve/__init__.py168
-rw-r--r--.venv/lib/python3.12/site-packages/soupsieve/__meta__.py197
-rw-r--r--.venv/lib/python3.12/site-packages/soupsieve/css_match.py1582
-rw-r--r--.venv/lib/python3.12/site-packages/soupsieve/css_parser.py1289
-rw-r--r--.venv/lib/python3.12/site-packages/soupsieve/css_types.py407
-rw-r--r--.venv/lib/python3.12/site-packages/soupsieve/pretty.py139
-rw-r--r--.venv/lib/python3.12/site-packages/soupsieve/py.typed0
-rw-r--r--.venv/lib/python3.12/site-packages/soupsieve/util.py117
8 files changed, 3899 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/soupsieve/__init__.py b/.venv/lib/python3.12/site-packages/soupsieve/__init__.py
new file mode 100644
index 00000000..45730dfa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/soupsieve/__init__.py
@@ -0,0 +1,168 @@
+"""
+Soup Sieve.
+
+A CSS selector filter for BeautifulSoup4.
+
+MIT License
+
+Copyright (c) 2018 Isaac Muse
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from __future__ import annotations
+from .__meta__ import __version__, __version_info__ # noqa: F401
+from . import css_parser as cp
+from . import css_match as cm
+from . import css_types as ct
+from .util import DEBUG, SelectorSyntaxError # noqa: F401
+import bs4 # type: ignore[import-untyped]
+from typing import Any, Iterator, Iterable
+
+__all__ = (
+ 'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
+ 'closest', 'compile', 'filter', 'iselect',
+ 'match', 'select', 'select_one'
+)
+
+SoupSieve = cm.SoupSieve
+
+
+def compile( # noqa: A001
+ pattern: str,
+ namespaces: dict[str, str] | None = None,
+ flags: int = 0,
+ *,
+ custom: dict[str, str] | None = None,
+ **kwargs: Any
+) -> cm.SoupSieve:
+ """Compile CSS pattern."""
+
+ if isinstance(pattern, SoupSieve):
+ if flags:
+ raise ValueError("Cannot process 'flags' argument on a compiled selector list")
+ elif namespaces is not None:
+ raise ValueError("Cannot process 'namespaces' argument on a compiled selector list")
+ elif custom is not None:
+ raise ValueError("Cannot process 'custom' argument on a compiled selector list")
+ return pattern
+
+ return cp._cached_css_compile(
+ pattern,
+ ct.Namespaces(namespaces) if namespaces is not None else namespaces,
+ ct.CustomSelectors(custom) if custom is not None else custom,
+ flags
+ )
+
+
+def purge() -> None:
+ """Purge cached patterns."""
+
+ cp._purge_cache()
+
+
+def closest(
+ select: str,
+ tag: bs4.Tag,
+ namespaces: dict[str, str] | None = None,
+ flags: int = 0,
+ *,
+ custom: dict[str, str] | None = None,
+ **kwargs: Any
+) -> bs4.Tag:
+ """Match closest ancestor."""
+
+ return compile(select, namespaces, flags, **kwargs).closest(tag)
+
+
+def match(
+ select: str,
+ tag: bs4.Tag,
+ namespaces: dict[str, str] | None = None,
+ flags: int = 0,
+ *,
+ custom: dict[str, str] | None = None,
+ **kwargs: Any
+) -> bool:
+ """Match node."""
+
+ return compile(select, namespaces, flags, **kwargs).match(tag)
+
+
+def filter( # noqa: A001
+ select: str,
+ iterable: Iterable[bs4.Tag],
+ namespaces: dict[str, str] | None = None,
+ flags: int = 0,
+ *,
+ custom: dict[str, str] | None = None,
+ **kwargs: Any
+) -> list[bs4.Tag]:
+ """Filter list of nodes."""
+
+ return compile(select, namespaces, flags, **kwargs).filter(iterable)
+
+
+def select_one(
+ select: str,
+ tag: bs4.Tag,
+ namespaces: dict[str, str] | None = None,
+ flags: int = 0,
+ *,
+ custom: dict[str, str] | None = None,
+ **kwargs: Any
+) -> bs4.Tag:
+ """Select a single tag."""
+
+ return compile(select, namespaces, flags, **kwargs).select_one(tag)
+
+
+def select(
+ select: str,
+ tag: bs4.Tag,
+ namespaces: dict[str, str] | None = None,
+ limit: int = 0,
+ flags: int = 0,
+ *,
+ custom: dict[str, str] | None = None,
+ **kwargs: Any
+) -> list[bs4.Tag]:
+ """Select the specified tags."""
+
+ return compile(select, namespaces, flags, **kwargs).select(tag, limit)
+
+
+def iselect(
+ select: str,
+ tag: bs4.Tag,
+ namespaces: dict[str, str] | None = None,
+ limit: int = 0,
+ flags: int = 0,
+ *,
+ custom: dict[str, str] | None = None,
+ **kwargs: Any
+) -> Iterator[bs4.Tag]:
+ """Iterate the specified tags."""
+
+ yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
+
+
+def escape(ident: str) -> str:
+ """Escape identifier."""
+
+ return cp.escape(ident)
diff --git a/.venv/lib/python3.12/site-packages/soupsieve/__meta__.py b/.venv/lib/python3.12/site-packages/soupsieve/__meta__.py
new file mode 100644
index 00000000..0fbf71b0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/soupsieve/__meta__.py
@@ -0,0 +1,197 @@
+"""Meta related things."""
+from __future__ import annotations
+from collections import namedtuple
+import re
+
+RE_VER = re.compile(
+ r'''(?x)
+ (?P<major>\d+)(?:\.(?P<minor>\d+))?(?:\.(?P<micro>\d+))?
+ (?:(?P<type>a|b|rc)(?P<pre>\d+))?
+ (?:\.post(?P<post>\d+))?
+ (?:\.dev(?P<dev>\d+))?
+ '''
+)
+
+REL_MAP = {
+ ".dev": "",
+ ".dev-alpha": "a",
+ ".dev-beta": "b",
+ ".dev-candidate": "rc",
+ "alpha": "a",
+ "beta": "b",
+ "candidate": "rc",
+ "final": ""
+}
+
+DEV_STATUS = {
+ ".dev": "2 - Pre-Alpha",
+ ".dev-alpha": "2 - Pre-Alpha",
+ ".dev-beta": "2 - Pre-Alpha",
+ ".dev-candidate": "2 - Pre-Alpha",
+ "alpha": "3 - Alpha",
+ "beta": "4 - Beta",
+ "candidate": "4 - Beta",
+ "final": "5 - Production/Stable"
+}
+
+PRE_REL_MAP = {"a": 'alpha', "b": 'beta', "rc": 'candidate'}
+
+
+class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre", "post", "dev"])):
+ """
+ Get the version (PEP 440).
+
+ A biased approach to the PEP 440 semantic version.
+
+ Provides a tuple structure which is sorted for comparisons `v1 > v2` etc.
+ (major, minor, micro, release type, pre-release build, post-release build, development release build)
+ Release types are named in is such a way they are comparable with ease.
+ Accessors to check if a development, pre-release, or post-release build. Also provides accessor to get
+ development status for setup files.
+
+ How it works (currently):
+
+ - You must specify a release type as either `final`, `alpha`, `beta`, or `candidate`.
+ - To define a development release, you can use either `.dev`, `.dev-alpha`, `.dev-beta`, or `.dev-candidate`.
+ The dot is used to ensure all development specifiers are sorted before `alpha`.
+ You can specify a `dev` number for development builds, but do not have to as implicit development releases
+ are allowed.
+ - You must specify a `pre` value greater than zero if using a prerelease as this project (not PEP 440) does not
+ allow implicit prereleases.
+ - You can optionally set `post` to a value greater than zero to make the build a post release. While post releases
+ are technically allowed in prereleases, it is strongly discouraged, so we are rejecting them. It should be
+ noted that we do not allow `post0` even though PEP 440 does not restrict this. This project specifically
+ does not allow implicit post releases.
+ - It should be noted that we do not support epochs `1!` or local versions `+some-custom.version-1`.
+
+ Acceptable version releases:
+
+ ```
+ Version(1, 0, 0, "final") 1.0
+ Version(1, 2, 0, "final") 1.2
+ Version(1, 2, 3, "final") 1.2.3
+ Version(1, 2, 0, ".dev-alpha", pre=4) 1.2a4
+ Version(1, 2, 0, ".dev-beta", pre=4) 1.2b4
+ Version(1, 2, 0, ".dev-candidate", pre=4) 1.2rc4
+ Version(1, 2, 0, "final", post=1) 1.2.post1
+ Version(1, 2, 3, ".dev") 1.2.3.dev0
+ Version(1, 2, 3, ".dev", dev=1) 1.2.3.dev1
+ ```
+
+ """
+
+ def __new__(
+ cls,
+ major: int, minor: int, micro: int, release: str = "final",
+ pre: int = 0, post: int = 0, dev: int = 0
+ ) -> Version:
+ """Validate version info."""
+
+ # Ensure all parts are positive integers.
+ for value in (major, minor, micro, pre, post):
+ if not (isinstance(value, int) and value >= 0):
+ raise ValueError("All version parts except 'release' should be integers.")
+
+ if release not in REL_MAP:
+ raise ValueError(f"'{release}' is not a valid release type.")
+
+ # Ensure valid pre-release (we do not allow implicit pre-releases).
+ if ".dev-candidate" < release < "final":
+ if pre == 0:
+ raise ValueError("Implicit pre-releases not allowed.")
+ elif dev:
+ raise ValueError("Version is not a development release.")
+ elif post:
+ raise ValueError("Post-releases are not allowed with pre-releases.")
+
+ # Ensure valid development or development/pre release
+ elif release < "alpha":
+ if release > ".dev" and pre == 0:
+ raise ValueError("Implicit pre-release not allowed.")
+ elif post:
+ raise ValueError("Post-releases are not allowed with pre-releases.")
+
+ # Ensure a valid normal release
+ else:
+ if pre:
+ raise ValueError("Version is not a pre-release.")
+ elif dev:
+ raise ValueError("Version is not a development release.")
+
+ return super().__new__(cls, major, minor, micro, release, pre, post, dev)
+
+ def _is_pre(self) -> bool:
+ """Is prerelease."""
+
+ return bool(self.pre > 0)
+
+ def _is_dev(self) -> bool:
+ """Is development."""
+
+ return bool(self.release < "alpha")
+
+ def _is_post(self) -> bool:
+ """Is post."""
+
+ return bool(self.post > 0)
+
+ def _get_dev_status(self) -> str: # pragma: no cover
+ """Get development status string."""
+
+ return DEV_STATUS[self.release]
+
+ def _get_canonical(self) -> str:
+ """Get the canonical output string."""
+
+ # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
+ if self.micro == 0:
+ ver = f"{self.major}.{self.minor}"
+ else:
+ ver = f"{self.major}.{self.minor}.{self.micro}"
+ if self._is_pre():
+ ver += f'{REL_MAP[self.release]}{self.pre}'
+ if self._is_post():
+ ver += f".post{self.post}"
+ if self._is_dev():
+ ver += f".dev{self.dev}"
+
+ return ver
+
+
+def parse_version(ver: str) -> Version:
+ """Parse version into a comparable Version tuple."""
+
+ m = RE_VER.match(ver)
+
+ if m is None:
+ raise ValueError(f"'{ver}' is not a valid version")
+
+ # Handle major, minor, micro
+ major = int(m.group('major'))
+ minor = int(m.group('minor')) if m.group('minor') else 0
+ micro = int(m.group('micro')) if m.group('micro') else 0
+
+ # Handle pre releases
+ if m.group('type'):
+ release = PRE_REL_MAP[m.group('type')]
+ pre = int(m.group('pre'))
+ else:
+ release = "final"
+ pre = 0
+
+ # Handle development releases
+ dev = m.group('dev') if m.group('dev') else 0
+ if m.group('dev'):
+ dev = int(m.group('dev'))
+ release = '.dev-' + release if pre else '.dev'
+ else:
+ dev = 0
+
+ # Handle post
+ post = int(m.group('post')) if m.group('post') else 0
+
+ return Version(major, minor, micro, release, pre, post, dev)
+
+
+__version_info__ = Version(2, 6, 0, "final")
+__version__ = __version_info__._get_canonical()
diff --git a/.venv/lib/python3.12/site-packages/soupsieve/css_match.py b/.venv/lib/python3.12/site-packages/soupsieve/css_match.py
new file mode 100644
index 00000000..e52e42d5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/soupsieve/css_match.py
@@ -0,0 +1,1582 @@
+"""CSS matcher."""
+from __future__ import annotations
+from datetime import datetime
+from . import util
+import re
+from . import css_types as ct
+import unicodedata
+import bs4 # type: ignore[import-untyped]
+from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
+
+# Empty tag pattern (whitespace okay)
+RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
+
+RE_NOT_WS = re.compile('[^ \t\r\n\f]+')
+
+# Relationships
+REL_PARENT = ' '
+REL_CLOSE_PARENT = '>'
+REL_SIBLING = '~'
+REL_CLOSE_SIBLING = '+'
+
+# Relationships for :has() (forward looking)
+REL_HAS_PARENT = ': '
+REL_HAS_CLOSE_PARENT = ':>'
+REL_HAS_SIBLING = ':~'
+REL_HAS_CLOSE_SIBLING = ':+'
+
+NS_XHTML = 'http://www.w3.org/1999/xhtml'
+NS_XML = 'http://www.w3.org/XML/1998/namespace'
+
+DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL
+RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE
+
+DIR_MAP = {
+ 'ltr': ct.SEL_DIR_LTR,
+ 'rtl': ct.SEL_DIR_RTL,
+ 'auto': 0
+}
+
+RE_NUM = re.compile(r"^(?P<value>-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$")
+RE_TIME = re.compile(r'^(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$')
+RE_MONTH = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})$')
+RE_WEEK = re.compile(r'^(?P<year>[0-9]{4,})-W(?P<week>[0-9]{2})$')
+RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})$')
+RE_DATETIME = re.compile(
+ r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
+)
+RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')
+
+MONTHS_30 = (4, 6, 9, 11) # April, June, September, and November
+FEB = 2
+SHORT_MONTH = 30
+LONG_MONTH = 31
+FEB_MONTH = 28
+FEB_LEAP_MONTH = 29
+DAYS_IN_WEEK = 7
+
+
+class _FakeParent:
+ """
+ Fake parent class.
+
+ When we have a fragment with no `BeautifulSoup` document object,
+ we can't evaluate `nth` selectors properly. Create a temporary
+ fake parent so we can traverse the root element as a child.
+ """
+
+ def __init__(self, element: bs4.Tag) -> None:
+ """Initialize."""
+
+ self.contents = [element]
+
+ def __len__(self) -> bs4.PageElement:
+ """Length."""
+
+ return len(self.contents)
+
+
+class _DocumentNav:
+ """Navigate a Beautiful Soup document."""
+
+ @classmethod
+ def assert_valid_input(cls, tag: Any) -> None:
+ """Check if valid input tag or document."""
+
+ # Fail on unexpected types.
+ if not cls.is_tag(tag):
+ raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")
+
+ @staticmethod
+ def is_doc(obj: bs4.Tag) -> bool:
+ """Is `BeautifulSoup` object."""
+ return isinstance(obj, bs4.BeautifulSoup)
+
+ @staticmethod
+ def is_tag(obj: bs4.PageElement) -> bool:
+ """Is tag."""
+ return isinstance(obj, bs4.Tag)
+
+ @staticmethod
+ def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover
+ """Is declaration."""
+ return isinstance(obj, bs4.Declaration)
+
+ @staticmethod
+ def is_cdata(obj: bs4.PageElement) -> bool:
+ """Is CDATA."""
+ return isinstance(obj, bs4.CData)
+
+ @staticmethod
+ def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover
+ """Is processing instruction."""
+ return isinstance(obj, bs4.ProcessingInstruction)
+
+ @staticmethod
+ def is_navigable_string(obj: bs4.PageElement) -> bool:
+ """Is navigable string."""
+ return isinstance(obj, bs4.NavigableString)
+
+ @staticmethod
+ def is_special_string(obj: bs4.PageElement) -> bool:
+ """Is special string."""
+ return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
+
+ @classmethod
+ def is_content_string(cls, obj: bs4.PageElement) -> bool:
+ """Check if node is content string."""
+
+ return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
+
+ @staticmethod
+ def create_fake_parent(el: bs4.Tag) -> _FakeParent:
+ """Create fake parent for a given element."""
+
+ return _FakeParent(el)
+
+ @staticmethod
+ def is_xml_tree(el: bs4.Tag) -> bool:
+ """Check if element (or document) is from a XML tree."""
+
+ return bool(el._is_xml)
+
+ def is_iframe(self, el: bs4.Tag) -> bool:
+ """Check if element is an `iframe`."""
+
+ return bool(
+ ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and
+ self.is_html_tag(el) # type: ignore[attr-defined]
+ )
+
+ def is_root(self, el: bs4.Tag) -> bool:
+ """
+ Return whether element is a root element.
+
+ We check that the element is the root of the tree (which we have already pre-calculated),
+ and we check if it is the root element under an `iframe`.
+ """
+
+ root = self.root and self.root is el # type: ignore[attr-defined]
+ if not root:
+ parent = self.get_parent(el)
+ root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
+ return root
+
+ def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
+ """Get contents or contents in reverse."""
+ if not no_iframe or not self.is_iframe(el):
+ yield from el.contents
+
+ def get_children(
+ self,
+ el: bs4.Tag,
+ start: int | None = None,
+ reverse: bool = False,
+ tags: bool = True,
+ no_iframe: bool = False
+ ) -> Iterator[bs4.PageElement]:
+ """Get children."""
+
+ if not no_iframe or not self.is_iframe(el):
+ last = len(el.contents) - 1
+ if start is None:
+ index = last if reverse else 0
+ else:
+ index = start
+ end = -1 if reverse else last + 1
+ incr = -1 if reverse else 1
+
+ if 0 <= index <= last:
+ while index != end:
+ node = el.contents[index]
+ index += incr
+ if not tags or self.is_tag(node):
+ yield node
+
+ def get_descendants(
+ self,
+ el: bs4.Tag,
+ tags: bool = True,
+ no_iframe: bool = False
+ ) -> Iterator[bs4.PageElement]:
+ """Get descendants."""
+
+ if not no_iframe or not self.is_iframe(el):
+ next_good = None
+ for child in el.descendants:
+
+ if next_good is not None:
+ if child is not next_good:
+ continue
+ next_good = None
+
+ is_tag = self.is_tag(child)
+
+ if no_iframe and is_tag and self.is_iframe(child):
+ if child.next_sibling is not None:
+ next_good = child.next_sibling
+ else:
+ last_child = child
+ while self.is_tag(last_child) and last_child.contents:
+ last_child = last_child.contents[-1]
+ next_good = last_child.next_element
+ yield child
+ if next_good is None:
+ break
+ # Coverage isn't seeing this even though it's executed
+ continue # pragma: no cover
+
+ if not tags or is_tag:
+ yield child
+
+ def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:
+ """Get parent."""
+
+ parent = el.parent
+ if no_iframe and parent is not None and self.is_iframe(parent):
+ parent = None
+ return parent
+
+ @staticmethod
+ def get_tag_name(el: bs4.Tag) -> str | None:
+ """Get tag."""
+
+ return cast('str | None', el.name)
+
+ @staticmethod
+ def get_prefix_name(el: bs4.Tag) -> str | None:
+ """Get prefix."""
+
+ return cast('str | None', el.prefix)
+
+ @staticmethod
+ def get_uri(el: bs4.Tag) -> str | None:
+ """Get namespace `URI`."""
+
+ return cast('str | None', el.namespace)
+
+ @classmethod
+ def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
+ """Get next sibling tag."""
+
+ sibling = el.next_sibling
+ while tags and not cls.is_tag(sibling) and sibling is not None:
+ sibling = sibling.next_sibling
+ return sibling
+
+ @classmethod
+ def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
+ """Get previous sibling tag."""
+
+ sibling = el.previous_sibling
+ while tags and not cls.is_tag(sibling) and sibling is not None:
+ sibling = sibling.previous_sibling
+ return sibling
+
+ @staticmethod
+ def has_html_ns(el: bs4.Tag) -> bool:
+ """
+ Check if element has an HTML namespace.
+
+ This is a bit different than whether a element is treated as having an HTML namespace,
+ like we do in the case of `is_html_tag`.
+ """
+
+ ns = getattr(el, 'namespace') if el else None # noqa: B009
+ return bool(ns and ns == NS_XHTML)
+
+ @staticmethod
+ def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
+ """Return namespace and attribute name without the prefix."""
+
+ return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
+
+ @classmethod
+ def normalize_value(cls, value: Any) -> str | Sequence[str]:
+ """Normalize the value to be a string or list of strings."""
+
+ # Treat `None` as empty string.
+ if value is None:
+ return ''
+
+ # Pass through strings
+ if (isinstance(value, str)):
+ return value
+
+ # If it's a byte string, convert it to Unicode, treating it as UTF-8.
+ if isinstance(value, bytes):
+ return value.decode("utf8")
+
+ # BeautifulSoup supports sequences of attribute values, so make sure the children are strings.
+ if isinstance(value, Sequence):
+ new_value = []
+ for v in value:
+ if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):
+ # This is most certainly a user error and will crash and burn later.
+ # To keep things working, we'll do what we do with all objects,
+ # And convert them to strings.
+ new_value.append(str(v))
+ else:
+ # Convert the child to a string
+ new_value.append(cast(str, cls.normalize_value(v)))
+ return new_value
+
+ # Try and make anything else a string
+ return str(value)
+
+ @classmethod
+ def get_attribute_by_name(
+ cls,
+ el: bs4.Tag,
+ name: str,
+ default: str | Sequence[str] | None = None
+ ) -> str | Sequence[str] | None:
+ """Get attribute by name."""
+
+ value = default
+ if el._is_xml:
+ try:
+ value = cls.normalize_value(el.attrs[name])
+ except KeyError:
+ pass
+ else:
+ for k, v in el.attrs.items():
+ if util.lower(k) == name:
+ value = cls.normalize_value(v)
+ break
+ return value
+
+ @classmethod
+ def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
+ """Iterate attributes."""
+
+ for k, v in el.attrs.items():
+ yield k, cls.normalize_value(v)
+
+ @classmethod
+ def get_classes(cls, el: bs4.Tag) -> Sequence[str]:
+ """Get classes."""
+
+ classes = cls.get_attribute_by_name(el, 'class', [])
+ if isinstance(classes, str):
+ classes = RE_NOT_WS.findall(classes)
+ return cast(Sequence[str], classes)
+
+ def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:
+ """Get text."""
+
+ return ''.join(
+ [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
+ )
+
+ def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:
+ """Get Own Text."""
+
+ return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
+
+
+class Inputs:
+ """Class for parsing and validating input items."""
+
+ @staticmethod
+ def validate_day(year: int, month: int, day: int) -> bool:
+ """Validate day."""
+
+ max_days = LONG_MONTH
+ if month == FEB:
+ max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH
+ elif month in MONTHS_30:
+ max_days = SHORT_MONTH
+ return 1 <= day <= max_days
+
+ @staticmethod
+ def validate_week(year: int, week: int) -> bool:
+ """Validate week."""
+
+ max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]
+ if max_week == 1:
+ max_week = 53
+ return 1 <= week <= max_week
+
+ @staticmethod
+ def validate_month(month: int) -> bool:
+ """Validate month."""
+
+ return 1 <= month <= 12
+
+ @staticmethod
+ def validate_year(year: int) -> bool:
+ """Validate year."""
+
+ return 1 <= year
+
+ @staticmethod
+ def validate_hour(hour: int) -> bool:
+ """Validate hour."""
+
+ return 0 <= hour <= 23
+
+ @staticmethod
+ def validate_minutes(minutes: int) -> bool:
+ """Validate minutes."""
+
+ return 0 <= minutes <= 59
+
+ @classmethod
+ def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
+ """Parse the input value."""
+
+ parsed = None # type: tuple[float, ...] | None
+ if value is None:
+ return value
+ if itype == "date":
+ m = RE_DATE.match(value)
+ if m:
+ year = int(m.group('year'), 10)
+ month = int(m.group('month'), 10)
+ day = int(m.group('day'), 10)
+ if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day):
+ parsed = (year, month, day)
+ elif itype == "month":
+ m = RE_MONTH.match(value)
+ if m:
+ year = int(m.group('year'), 10)
+ month = int(m.group('month'), 10)
+ if cls.validate_year(year) and cls.validate_month(month):
+ parsed = (year, month)
+ elif itype == "week":
+ m = RE_WEEK.match(value)
+ if m:
+ year = int(m.group('year'), 10)
+ week = int(m.group('week'), 10)
+ if cls.validate_year(year) and cls.validate_week(year, week):
+ parsed = (year, week)
+ elif itype == "time":
+ m = RE_TIME.match(value)
+ if m:
+ hour = int(m.group('hour'), 10)
+ minutes = int(m.group('minutes'), 10)
+ if cls.validate_hour(hour) and cls.validate_minutes(minutes):
+ parsed = (hour, minutes)
+ elif itype == "datetime-local":
+ m = RE_DATETIME.match(value)
+ if m:
+ year = int(m.group('year'), 10)
+ month = int(m.group('month'), 10)
+ day = int(m.group('day'), 10)
+ hour = int(m.group('hour'), 10)
+ minutes = int(m.group('minutes'), 10)
+ if (
+ cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and
+ cls.validate_hour(hour) and cls.validate_minutes(minutes)
+ ):
+ parsed = (year, month, day, hour, minutes)
+ elif itype in ("number", "range"):
+ m = RE_NUM.match(value)
+ if m:
+ parsed = (float(m.group('value')),)
+ return parsed
+
+
+class CSSMatch(_DocumentNav):
+ """Perform CSS matching."""
+
+ def __init__(
+ self,
+ selectors: ct.SelectorList,
+ scope: bs4.Tag,
+ namespaces: ct.Namespaces | None,
+ flags: int
+ ) -> None:
+ """Initialize."""
+
+ self.assert_valid_input(scope)
+ self.tag = scope
+ self.cached_meta_lang = [] # type: list[tuple[str, str]]
+ self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]
+ self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]
+ self.selectors = selectors
+ self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]
+ self.flags = flags
+ self.iframe_restrict = False
+
+ # Find the root element for the whole tree
+ doc = scope
+ parent = self.get_parent(doc)
+ while parent:
+ doc = parent
+ parent = self.get_parent(doc)
+ root = None
+ if not self.is_doc(doc):
+ root = doc
+ else:
+ for child in self.get_children(doc):
+ root = child
+ break
+
+ self.root = root
+ self.scope = scope if scope is not doc else root
+ self.has_html_namespace = self.has_html_ns(root)
+
+ # A document can be both XML and HTML (XHTML)
+ self.is_xml = self.is_xml_tree(doc)
+ self.is_html = not self.is_xml or self.has_html_namespace
+
+ def supports_namespaces(self) -> bool:
+ """Check if namespaces are supported in the HTML type."""
+
+ return self.is_xml or self.has_html_namespace
+
+ def get_tag_ns(self, el: bs4.Tag) -> str:
+ """Get tag namespace."""
+
+ if self.supports_namespaces():
+ namespace = ''
+ ns = self.get_uri(el)
+ if ns:
+ namespace = ns
+ else:
+ namespace = NS_XHTML
+ return namespace
+
+ def is_html_tag(self, el: bs4.Tag) -> bool:
+ """Check if tag is in HTML namespace."""
+
+ return self.get_tag_ns(el) == NS_XHTML
+
+ def get_tag(self, el: bs4.Tag) -> str | None:
+ """Get tag."""
+
+ name = self.get_tag_name(el)
+ return util.lower(name) if name is not None and not self.is_xml else name
+
+ def get_prefix(self, el: bs4.Tag) -> str | None:
+ """Get prefix."""
+
+ prefix = self.get_prefix_name(el)
+ return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
+
+ def find_bidi(self, el: bs4.Tag) -> int | None:
+ """Get directionality from element text."""
+
+ for node in self.get_children(el, tags=False):
+
+ # Analyze child text nodes
+ if self.is_tag(node):
+
+ # Avoid analyzing certain elements specified in the specification.
+ direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None)
+ if (
+ self.get_tag(node) in ('bdi', 'script', 'style', 'textarea', 'iframe') or
+ not self.is_html_tag(node) or
+ direction is not None
+ ):
+ continue # pragma: no cover
+
+ # Check directionality of this node's text
+ value = self.find_bidi(node)
+ if value is not None:
+ return value
+
+ # Direction could not be determined
+ continue # pragma: no cover
+
+ # Skip `doctype` comments, etc.
+ if self.is_special_string(node):
+ continue
+
+ # Analyze text nodes for directionality.
+ for c in node:
+ bidi = unicodedata.bidirectional(c)
+ if bidi in ('AL', 'R', 'L'):
+ return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
+ return None
+
+ def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:
+ """Filter the language tags."""
+
+ match = True
+ lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
+ ranges = lang_range.split('-')
+ subtags = lang_tag.lower().split('-')
+ length = len(ranges)
+ slength = len(subtags)
+ rindex = 0
+ sindex = 0
+ r = ranges[rindex]
+ s = subtags[sindex]
+
+ # Empty specified language should match unspecified language attributes
+ if length == 1 and slength == 1 and not r and r == s:
+ return True
+
+ # Primary tag needs to match
+ if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
+ match = False
+
+ rindex += 1
+ sindex += 1
+
+ # Match until we run out of ranges
+ while match and rindex < length:
+ r = ranges[rindex]
+ try:
+ s = subtags[sindex]
+ except IndexError:
+ # Ran out of subtags,
+ # but we still have ranges
+ match = False
+ continue
+
+ # Empty range
+ if not r:
+ match = False
+ continue
+
+ # Matched range
+ elif s == r:
+ rindex += 1
+
+ # Implicit wildcard cannot match
+ # singletons
+ elif len(s) == 1:
+ match = False
+ continue
+
+ # Implicitly matched, so grab next subtag
+ sindex += 1
+
+ return match
+
+ def match_attribute_name(
+ self,
+ el: bs4.Tag,
+ attr: str,
+ prefix: str | None
+ ) -> str | Sequence[str] | None:
+ """Match attribute name and return value if it exists."""
+
+ value = None
+ if self.supports_namespaces():
+ value = None
+ # If we have not defined namespaces, we can't very well find them, so don't bother trying.
+ if prefix:
+ ns = self.namespaces.get(prefix)
+ if ns is None and prefix != '*':
+ return None
+ else:
+ ns = None
+
+ for k, v in self.iter_attributes(el):
+
+ # Get attribute parts
+ namespace, name = self.split_namespace(el, k)
+
+ # Can't match a prefix attribute as we haven't specified one to match
+ # Try to match it normally as a whole `p:a` as selector may be trying `p\:a`.
+ if ns is None:
+ if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)):
+ value = v
+ break
+ # Coverage is not finding this even though it is executed.
+ # Adding a print statement before this (and erasing coverage) causes coverage to find the line.
+ # Ignore the false positive message.
+ continue # pragma: no cover
+
+ # We can't match our desired prefix attribute as the attribute doesn't have a prefix
+ if namespace is None or ns != namespace and prefix != '*':
+ continue
+
+ # The attribute doesn't match.
+ if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name):
+ continue
+
+ value = v
+ break
+ else:
+ for k, v in self.iter_attributes(el):
+ if util.lower(attr) != util.lower(k):
+ continue
+ value = v
+ break
+ return value
+
+ def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
+ """Match the namespace of the element."""
+
+ match = True
+ namespace = self.get_tag_ns(el)
+ default_namespace = self.namespaces.get('')
+ tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)
+ # We must match the default namespace if one is not provided
+ if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):
+ match = False
+ # If we specified `|tag`, we must not have a namespace.
+ elif (tag.prefix is not None and tag.prefix == '' and namespace):
+ match = False
+ # Verify prefix matches
+ elif (
+ tag.prefix and
+ tag.prefix != '*' and (tag_ns is None or namespace != tag_ns)
+ ):
+ match = False
+ return match
+
+ def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:
+ """Match attributes."""
+
+ match = True
+ if attributes:
+ for a in attributes:
+ temp = self.match_attribute_name(el, a.attribute, a.prefix)
+ pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern
+ if temp is None:
+ match = False
+ break
+ value = temp if isinstance(temp, str) else ' '.join(temp)
+ if pattern is None:
+ continue
+ elif pattern.match(value) is None:
+ match = False
+ break
+ return match
+
+ def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
+ """Match tag name."""
+
+ name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
+ return not (
+ name is not None and
+ name not in (self.get_tag(el), '*')
+ )
+
+ def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
+ """Match the tag."""
+
+ match = True
+ if tag is not None:
+ # Verify namespace
+ if not self.match_namespace(el, tag):
+ match = False
+ if not self.match_tagname(el, tag):
+ match = False
+ return match
+
+ def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
+ """Match past relationship."""
+
+ found = False
+ # I don't think this can ever happen, but it makes `mypy` happy
+ if isinstance(relation[0], ct.SelectorNull): # pragma: no cover
+ return found
+
+ if relation[0].rel_type == REL_PARENT:
+ parent = self.get_parent(el, no_iframe=self.iframe_restrict)
+ while not found and parent:
+ found = self.match_selectors(parent, relation)
+ parent = self.get_parent(parent, no_iframe=self.iframe_restrict)
+ elif relation[0].rel_type == REL_CLOSE_PARENT:
+ parent = self.get_parent(el, no_iframe=self.iframe_restrict)
+ if parent:
+ found = self.match_selectors(parent, relation)
+ elif relation[0].rel_type == REL_SIBLING:
+ sibling = self.get_previous(el)
+ while not found and sibling:
+ found = self.match_selectors(sibling, relation)
+ sibling = self.get_previous(sibling)
+ elif relation[0].rel_type == REL_CLOSE_SIBLING:
+ sibling = self.get_previous(el)
+ if sibling and self.is_tag(sibling):
+ found = self.match_selectors(sibling, relation)
+ return found
+
+ def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:
+ """Match future child."""
+
+ match = False
+ if recursive:
+ children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]
+ else:
+ children = self.get_children
+ for child in children(parent, no_iframe=self.iframe_restrict):
+ match = self.match_selectors(child, relation)
+ if match:
+ break
+ return match
+
+ def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
+ """Match future relationship."""
+
+ found = False
+ # I don't think this can ever happen, but it makes `mypy` happy
+ if isinstance(relation[0], ct.SelectorNull): # pragma: no cover
+ return found
+
+ if relation[0].rel_type == REL_HAS_PARENT:
+ found = self.match_future_child(el, relation, True)
+ elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
+ found = self.match_future_child(el, relation)
+ elif relation[0].rel_type == REL_HAS_SIBLING:
+ sibling = self.get_next(el)
+ while not found and sibling:
+ found = self.match_selectors(sibling, relation)
+ sibling = self.get_next(sibling)
+ elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
+ sibling = self.get_next(el)
+ if sibling and self.is_tag(sibling):
+ found = self.match_selectors(sibling, relation)
+ return found
+
+ def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
+ """Match relationship to other elements."""
+
+ found = False
+
+ if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:
+ return found
+
+ if relation[0].rel_type.startswith(':'):
+ found = self.match_future_relations(el, relation)
+ else:
+ found = self.match_past_relations(el, relation)
+
+ return found
+
+ def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:
+ """Match element's ID."""
+
+ found = True
+ for i in ids:
+ if i != self.get_attribute_by_name(el, 'id', ''):
+ found = False
+ break
+ return found
+
+ def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:
+ """Match element's classes."""
+
+ current_classes = self.get_classes(el)
+ found = True
+ for c in classes:
+ if c not in current_classes:
+ found = False
+ break
+ return found
+
+ def match_root(self, el: bs4.Tag) -> bool:
+ """Match element as root."""
+
+ is_root = self.is_root(el)
+ if is_root:
+ sibling = self.get_previous(el, tags=False)
+ while is_root and sibling is not None:
+ if (
+ self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
+ self.is_cdata(sibling)
+ ):
+ is_root = False
+ else:
+ sibling = self.get_previous(sibling, tags=False)
+ if is_root:
+ sibling = self.get_next(el, tags=False)
+ while is_root and sibling is not None:
+ if (
+ self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
+ self.is_cdata(sibling)
+ ):
+ is_root = False
+ else:
+ sibling = self.get_next(sibling, tags=False)
+ return is_root
+
+ def match_scope(self, el: bs4.Tag) -> bool:
+ """Match element as scope."""
+
+ return self.scope is el
+
+ def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:
+ """Match tag type for `nth` matches."""
+
+ return (
+ (self.get_tag(child) == self.get_tag(el)) and
+ (self.get_tag_ns(child) == self.get_tag_ns(el))
+ )
+
+ def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:
+ """Match `nth` elements."""
+
+ matched = True
+
+ for n in nth:
+ matched = False
+ if n.selectors and not self.match_selectors(el, n.selectors):
+ break
+ parent = self.get_parent(el)
+ if parent is None:
+ parent = self.create_fake_parent(el)
+ last = n.last
+ last_index = len(parent) - 1
+ index = last_index if last else 0
+ relative_index = 0
+ a = n.a
+ b = n.b
+ var = n.n
+ count = 0
+ count_incr = 1
+ factor = -1 if last else 1
+ idx = last_idx = a * count + b if var else a
+
+ # We can only adjust bounds within a variable index
+ if var:
+ # Abort if our nth index is out of bounds and only getting further out of bounds as we increment.
+ # Otherwise, increment to try to get in bounds.
+ adjust = None
+ while idx < 1 or idx > last_index:
+ if idx < 0:
+ diff_low = 0 - idx
+ if adjust is not None and adjust == 1:
+ break
+ adjust = -1
+ count += count_incr
+ idx = last_idx = a * count + b if var else a
+ diff = 0 - idx
+ if diff >= diff_low:
+ break
+ else:
+ diff_high = idx - last_index
+ if adjust is not None and adjust == -1:
+ break
+ adjust = 1
+ count += count_incr
+ idx = last_idx = a * count + b if var else a
+ diff = idx - last_index
+ if diff >= diff_high:
+ break
+ diff_high = diff
+
+ # If a < 0, our count is working backwards, so floor the index by increasing the count.
+ # Find the count that yields the lowest, in bound value and use that.
+ # Lastly reverse count increment so that we'll increase our index.
+ lowest = count
+ if a < 0:
+ while idx >= 1:
+ lowest = count
+ count += count_incr
+ idx = last_idx = a * count + b if var else a
+ count_incr = -1
+ count = lowest
+ idx = last_idx = a * count + b if var else a
+
+ # Evaluate elements while our calculated nth index is still in range
+ while 1 <= idx <= last_index + 1:
+ child = None
+ # Evaluate while our child index is still in range.
+ for child in self.get_children(parent, start=index, reverse=factor < 0, tags=False):
+ index += factor
+ if not self.is_tag(child):
+ continue
+ # Handle `of S` in `nth-child`
+ if n.selectors and not self.match_selectors(child, n.selectors):
+ continue
+ # Handle `of-type`
+ if n.of_type and not self.match_nth_tag_type(el, child):
+ continue
+ relative_index += 1
+ if relative_index == idx:
+ if child is el:
+ matched = True
+ else:
+ break
+ if child is el:
+ break
+ if child is el:
+ break
+ last_idx = idx
+ count += count_incr
+ if count < 0:
+ # Count is counting down and has now ventured into invalid territory.
+ break
+ idx = a * count + b if var else a
+ if last_idx == idx:
+ break
+ if not matched:
+ break
+ return matched
+
+ def match_empty(self, el: bs4.Tag) -> bool:
+ """Check if element is empty (if requested)."""
+
+ is_empty = True
+ for child in self.get_children(el, tags=False):
+ if self.is_tag(child):
+ is_empty = False
+ break
+ elif self.is_content_string(child) and RE_NOT_EMPTY.search(child):
+ is_empty = False
+ break
+ return is_empty
+
+ def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:
+ """Match selectors."""
+
+ match = True
+ for sel in selectors:
+ if not self.match_selectors(el, sel):
+ match = False
+ return match
+
+ def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:
+ """Match element if it contains text."""
+
+ match = True
+ content = None # type: str | Sequence[str] | None
+ for contain_list in contains:
+ if content is None:
+ if contain_list.own:
+ content = self.get_own_text(el, no_iframe=self.is_html)
+ else:
+ content = self.get_text(el, no_iframe=self.is_html)
+ found = False
+ for text in contain_list.text:
+ if contain_list.own:
+ for c in content:
+ if text in c:
+ found = True
+ break
+ if found:
+ break
+ else:
+ if text in content:
+ found = True
+ break
+ if not found:
+ match = False
+ return match
+
+ def match_default(self, el: bs4.Tag) -> bool:
+ """Match default."""
+
+ match = False
+
+ # Find this input's form
+ form = None
+ parent = self.get_parent(el, no_iframe=True)
+ while parent and form is None:
+ if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
+ form = parent
+ else:
+ parent = self.get_parent(parent, no_iframe=True)
+
+ # Look in form cache to see if we've already located its default button
+ found_form = False
+ for f, t in self.cached_default_forms:
+ if f is form:
+ found_form = True
+ if t is el:
+ match = True
+ break
+
+ # We didn't have the form cached, so look for its default button
+ if not found_form:
+ for child in self.get_descendants(form, no_iframe=True):
+ name = self.get_tag(child)
+ # Can't do nested forms (haven't figured out why we never hit this)
+ if name == 'form': # pragma: no cover
+ break
+ if name in ('input', 'button'):
+ v = self.get_attribute_by_name(child, 'type', '')
+ if v and util.lower(v) == 'submit':
+ self.cached_default_forms.append((form, child))
+ if el is child:
+ match = True
+ break
+ return match
+
+ def match_indeterminate(self, el: bs4.Tag) -> bool:
+ """Match default."""
+
+ match = False
+ name = cast(str, self.get_attribute_by_name(el, 'name'))
+
+ def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
+ """Find this input's form."""
+ form = None
+ parent = self.get_parent(el, no_iframe=True)
+ while form is None:
+ if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
+ form = parent
+ break
+ last_parent = parent
+ parent = self.get_parent(parent, no_iframe=True)
+ if parent is None:
+ form = last_parent
+ break
+ return form
+
+ form = get_parent_form(el)
+
+ # Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate
+ found_form = False
+ for f, n, i in self.cached_indeterminate_forms:
+ if f is form and n == name:
+ found_form = True
+ if i is True:
+ match = True
+ break
+
+ # We didn't have the form cached, so validate that the radio button is indeterminate
+ if not found_form:
+ checked = False
+ for child in self.get_descendants(form, no_iframe=True):
+ if child is el:
+ continue
+ tag_name = self.get_tag(child)
+ if tag_name == 'input':
+ is_radio = False
+ check = False
+ has_name = False
+ for k, v in self.iter_attributes(child):
+ if util.lower(k) == 'type' and util.lower(v) == 'radio':
+ is_radio = True
+ elif util.lower(k) == 'name' and v == name:
+ has_name = True
+ elif util.lower(k) == 'checked':
+ check = True
+ if is_radio and check and has_name and get_parent_form(child) is form:
+ checked = True
+ break
+ if checked:
+ break
+ if not checked:
+ match = True
+ self.cached_indeterminate_forms.append((form, name, match))
+
+ return match
+
+ def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
+ """Match languages."""
+
+ match = False
+ has_ns = self.supports_namespaces()
+ root = self.root
+ has_html_namespace = self.has_html_namespace
+
+ # Walk parents looking for `lang` (HTML) or `xml:lang` XML property.
+ parent = el
+ found_lang = None
+ last = None
+ while not found_lang:
+ has_html_ns = self.has_html_ns(parent)
+ for k, v in self.iter_attributes(parent):
+ attr_ns, attr = self.split_namespace(parent, k)
+ if (
+ ((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or
+ (
+ has_ns and not has_html_ns and attr_ns == NS_XML and
+ (util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang'
+ )
+ ):
+ found_lang = v
+ break
+ last = parent
+ parent = self.get_parent(parent, no_iframe=self.is_html)
+
+ if parent is None:
+ root = last
+ has_html_namespace = self.has_html_ns(root)
+ parent = last
+ break
+
+ # Use cached meta language.
+ if found_lang is None and self.cached_meta_lang:
+ for cache in self.cached_meta_lang:
+ if root is cache[0]:
+ found_lang = cache[1]
+
+ # If we couldn't find a language, and the document is HTML, look to meta to determine language.
+ if found_lang is None and (not self.is_xml or (has_html_namespace and root.name == 'html')):
+ # Find head
+ found = False
+ for tag in ('html', 'head'):
+ found = False
+ for child in self.get_children(parent, no_iframe=self.is_html):
+ if self.get_tag(child) == tag and self.is_html_tag(child):
+ found = True
+ parent = child
+ break
+ if not found: # pragma: no cover
+ break
+
+ # Search meta tags
+ if found:
+ for child in parent:
+ if self.is_tag(child) and self.get_tag(child) == 'meta' and self.is_html_tag(parent):
+ c_lang = False
+ content = None
+ for k, v in self.iter_attributes(child):
+ if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language':
+ c_lang = True
+ if util.lower(k) == 'content':
+ content = v
+ if c_lang and content:
+ found_lang = content
+ self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
+ break
+ if found_lang is not None:
+ break
+ if found_lang is None:
+ self.cached_meta_lang.append((cast(str, root), ''))
+
+ # If we determined a language, compare.
+ if found_lang is not None:
+ for patterns in langs:
+ match = False
+ for pattern in patterns:
+ if self.extended_language_filter(pattern, cast(str, found_lang)):
+ match = True
+ if not match:
+ break
+
+ return match
+
+ def match_dir(self, el: bs4.Tag, directionality: int) -> bool:
+ """Check directionality."""
+
+ # If we have to match both left and right, we can't match either.
+ if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL:
+ return False
+
+ if el is None or not self.is_html_tag(el):
+ return False
+
+ # Element has defined direction of left to right or right to left
+ direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None)
+ if direction not in (None, 0):
+ return direction == directionality
+
+ # Element is the document element (the root) and no direction assigned, assume left to right.
+ is_root = self.is_root(el)
+ if is_root and direction is None:
+ return ct.SEL_DIR_LTR == directionality
+
+ # If `input[type=telephone]` and no direction is assigned, assume left to right.
+ name = self.get_tag(el)
+ is_input = name == 'input'
+ is_textarea = name == 'textarea'
+ is_bdi = name == 'bdi'
+ itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else ''
+ if is_input and itype == 'tel' and direction is None:
+ return ct.SEL_DIR_LTR == directionality
+
+ # Auto handling for text inputs
+ if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
+ if is_textarea:
+ value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))
+ else:
+ value = cast(str, self.get_attribute_by_name(el, 'value', ''))
+ if value:
+ for c in value:
+ bidi = unicodedata.bidirectional(c)
+ if bidi in ('AL', 'R', 'L'):
+ direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
+ return direction == directionality
+ # Assume left to right
+ return ct.SEL_DIR_LTR == directionality
+ elif is_root:
+ return ct.SEL_DIR_LTR == directionality
+ return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
+
+ # Auto handling for `bdi` and other non text inputs.
+ if (is_bdi and direction is None) or direction == 0:
+ direction = self.find_bidi(el)
+ if direction is not None:
+ return direction == directionality
+ elif is_root:
+ return ct.SEL_DIR_LTR == directionality
+ return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
+
+ # Match parents direction
+ return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
+
+ def match_range(self, el: bs4.Tag, condition: int) -> bool:
+ """
+ Match range.
+
+ Behavior is modeled after what we see in browsers. Browsers seem to evaluate
+ if the value is out of range, and if not, it is in range. So a missing value
+ will not evaluate out of range; therefore, value is in range. Personally, I
+ feel like this should evaluate as neither in or out of range.
+ """
+
+ out_of_range = False
+
+ itype = util.lower(self.get_attribute_by_name(el, 'type'))
+ mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))
+ mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))
+
+ # There is no valid min or max, so we cannot evaluate a range
+ if mn is None and mx is None:
+ return False
+
+ value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))
+ if value is not None:
+ if itype in ("date", "datetime-local", "month", "week", "number", "range"):
+ if mn is not None and value < mn:
+ out_of_range = True
+ if not out_of_range and mx is not None and value > mx:
+ out_of_range = True
+ elif itype == "time":
+ if mn is not None and mx is not None and mn > mx:
+ # Time is periodic, so this is a reversed/discontinuous range
+ if value < mn and value > mx:
+ out_of_range = True
+ else:
+ if mn is not None and value < mn:
+ out_of_range = True
+ if not out_of_range and mx is not None and value > mx:
+ out_of_range = True
+
+ return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
+
+ def match_defined(self, el: bs4.Tag) -> bool:
+ """
+ Match defined.
+
+ `:defined` is related to custom elements in a browser.
+
+ - If the document is XML (not XHTML), all tags will match.
+ - Tags that are not custom (don't have a hyphen) are marked defined.
+ - If the tag has a prefix (without or without a namespace), it will not match.
+
+ This is of course requires the parser to provide us with the proper prefix and namespace info,
+ if it doesn't, there is nothing we can do.
+ """
+
+ name = self.get_tag(el)
+ return (
+ name is not None and (
+ name.find('-') == -1 or
+ name.find(':') != -1 or
+ self.get_prefix(el) is not None
+ )
+ )
+
+ def match_placeholder_shown(self, el: bs4.Tag) -> bool:
+ """
+ Match placeholder shown according to HTML spec.
+
+ - text area should be checked if they have content. A single newline does not count as content.
+
+ """
+
+ match = False
+ content = self.get_text(el)
+ if content in ('', '\n'):
+ match = True
+
+ return match
+
+ def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:
+ """Check if element matches one of the selectors."""
+
+ match = False
+ is_not = selectors.is_not
+ is_html = selectors.is_html
+
+ # Internal selector lists that use the HTML flag, will automatically get the `html` namespace.
+ if is_html:
+ namespaces = self.namespaces
+ iframe_restrict = self.iframe_restrict
+ self.namespaces = {'html': NS_XHTML}
+ self.iframe_restrict = True
+
+ if not is_html or self.is_html:
+ for selector in selectors:
+ match = is_not
+ # We have a un-matchable situation (like `:focus` as you can focus an element in this environment)
+ if isinstance(selector, ct.SelectorNull):
+ continue
+ # Verify tag matches
+ if not self.match_tag(el, selector.tag):
+ continue
+ # Verify tag is defined
+ if selector.flags & ct.SEL_DEFINED and not self.match_defined(el):
+ continue
+ # Verify element is root
+ if selector.flags & ct.SEL_ROOT and not self.match_root(el):
+ continue
+ # Verify element is scope
+ if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
+ continue
+ # Verify element has placeholder shown
+ if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
+ continue
+ # Verify `nth` matches
+ if not self.match_nth(el, selector.nth):
+ continue
+ if selector.flags & ct.SEL_EMPTY and not self.match_empty(el):
+ continue
+ # Verify id matches
+ if selector.ids and not self.match_id(el, selector.ids):
+ continue
+ # Verify classes match
+ if selector.classes and not self.match_classes(el, selector.classes):
+ continue
+ # Verify attribute(s) match
+ if not self.match_attributes(el, selector.attributes):
+ continue
+ # Verify ranges
+ if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES):
+ continue
+ # Verify language patterns
+ if selector.lang and not self.match_lang(el, selector.lang):
+ continue
+ # Verify pseudo selector patterns
+ if selector.selectors and not self.match_subselectors(el, selector.selectors):
+ continue
+ # Verify relationship selectors
+ if selector.relation and not self.match_relations(el, selector.relation):
+ continue
+ # Validate that the current default selector match corresponds to the first submit button in the form
+ if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):
+ continue
+ # Validate that the unset radio button is among radio buttons with the same name in a form that are
+ # also not set.
+ if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el):
+ continue
+ # Validate element directionality
+ if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):
+ continue
+ # Validate that the tag contains the specified text.
+ if selector.contains and not self.match_contains(el, selector.contains):
+ continue
+ match = not is_not
+ break
+
+ # Restore actual namespaces being used for external selector lists
+ if is_html:
+ self.namespaces = namespaces
+ self.iframe_restrict = iframe_restrict
+
+ return match
+
+ def select(self, limit: int = 0) -> Iterator[bs4.Tag]:
+ """Match all tags under the targeted tag."""
+
+ lim = None if limit < 1 else limit
+
+ for child in self.get_descendants(self.tag):
+ if self.match(child):
+ yield child
+ if lim is not None:
+ lim -= 1
+ if lim < 1:
+ break
+
+ def closest(self) -> bs4.Tag | None:
+ """Match closest ancestor."""
+
+ current = self.tag
+ closest = None
+ while closest is None and current is not None:
+ if self.match(current):
+ closest = current
+ else:
+ current = self.get_parent(current)
+ return closest
+
+ def filter(self) -> list[bs4.Tag]: # noqa A001
+ """Filter tag's children."""
+
+ return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
+
+ def match(self, el: bs4.Tag) -> bool:
+ """Match."""
+
+ return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
+
+
+class SoupSieve(ct.Immutable):
+ """Compiled Soup Sieve selector matching object."""
+
+ pattern: str
+ selectors: ct.SelectorList
+ namespaces: ct.Namespaces | None
+ custom: dict[str, str]
+ flags: int
+
+ __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
+
+ def __init__(
+ self,
+ pattern: str,
+ selectors: ct.SelectorList,
+ namespaces: ct.Namespaces | None,
+ custom: ct.CustomSelectors | None,
+ flags: int
+ ):
+ """Initialize."""
+
+ super().__init__(
+ pattern=pattern,
+ selectors=selectors,
+ namespaces=namespaces,
+ custom=custom,
+ flags=flags
+ )
+
+ def match(self, tag: bs4.Tag) -> bool:
+ """Match."""
+
+ return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
+
+ def closest(self, tag: bs4.Tag) -> bs4.Tag:
+ """Match closest ancestor."""
+
+ return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
+
+ def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001
+ """
+ Filter.
+
+ `CSSMatch` can cache certain searches for tags of the same document,
+ so if we are given a tag, all tags are from the same document,
+ and we can take advantage of the optimization.
+
+ Any other kind of iterable could have tags from different documents or detached tags,
+ so for those, we use a new `CSSMatch` for each item in the iterable.
+ """
+
+ if CSSMatch.is_tag(iterable):
+ return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter()
+ else:
+ return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
+
+ def select_one(self, tag: bs4.Tag) -> bs4.Tag:
+ """Select a single tag."""
+
+ tags = self.select(tag, limit=1)
+ return tags[0] if tags else None
+
+ def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:
+ """Select the specified tags."""
+
+ return list(self.iselect(tag, limit))
+
+ def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
+ """Iterate the specified tags."""
+
+ yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)
+
+ def __repr__(self) -> str: # pragma: no cover
+ """Representation."""
+
+ return (
+ f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "
+ f"custom={self.custom!r}, flags={self.flags!r})"
+ )
+
+ __str__ = __repr__
+
+
+ct.pickle_register(SoupSieve)
diff --git a/.venv/lib/python3.12/site-packages/soupsieve/css_parser.py b/.venv/lib/python3.12/site-packages/soupsieve/css_parser.py
new file mode 100644
index 00000000..bedae694
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/soupsieve/css_parser.py
@@ -0,0 +1,1289 @@
+"""CSS selector parser."""
+from __future__ import annotations
+import re
+from functools import lru_cache
+from . import util
+from . import css_match as cm
+from . import css_types as ct
+from .util import SelectorSyntaxError
+import warnings
+from typing import Match, Any, Iterator, cast
+
+UNICODE_REPLACEMENT_CHAR = 0xFFFD
+
+# Simple pseudo classes that take no parameters
+PSEUDO_SIMPLE = {
+ ":any-link",
+ ":empty",
+ ":first-child",
+ ":first-of-type",
+ ":in-range",
+ ":out-of-range",
+ ":last-child",
+ ":last-of-type",
+ ":link",
+ ":only-child",
+ ":only-of-type",
+ ":root",
+ ':checked',
+ ':default',
+ ':disabled',
+ ':enabled',
+ ':indeterminate',
+ ':optional',
+ ':placeholder-shown',
+ ':read-only',
+ ':read-write',
+ ':required',
+ ':scope',
+ ':defined'
+}
+
+# Supported, simple pseudo classes that match nothing in the Soup Sieve environment
+PSEUDO_SIMPLE_NO_MATCH = {
+ ':active',
+ ':current',
+ ':focus',
+ ':focus-visible',
+ ':focus-within',
+ ':future',
+ ':host',
+ ':hover',
+ ':local-link',
+ ':past',
+ ':paused',
+ ':playing',
+ ':target',
+ ':target-within',
+ ':user-invalid',
+ ':visited'
+}
+
+# Complex pseudo classes that take selector lists
+PSEUDO_COMPLEX = {
+ ':contains',
+ ':-soup-contains',
+ ':-soup-contains-own',
+ ':has',
+ ':is',
+ ':matches',
+ ':not',
+ ':where'
+}
+
+PSEUDO_COMPLEX_NO_MATCH = {
+ ':current',
+ ':host',
+ ':host-context'
+}
+
+# Complex pseudo classes that take very specific parameters and are handled special
+PSEUDO_SPECIAL = {
+ ':dir',
+ ':lang',
+ ':nth-child',
+ ':nth-last-child',
+ ':nth-last-of-type',
+ ':nth-of-type'
+}
+
+PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL
+
+# Sub-patterns parts
+# Whitespace
+NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
+WS = fr'(?:[ \t]|{NEWLINE})'
+# Comments
+COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
+# Whitespace with comments included
+WSC = fr'(?:{WS}|{COMMENTS})'
+# CSS escapes
+CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
+CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
+# CSS Identifier
+IDENTIFIER = fr'''
+(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
+(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
+'''
+# `nth` content
+NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
+# Value: quoted string or identifier
+VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)'''
+# Attribute value comparison. `!=` is handled special as it is non-standard.
+ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]'
+
+# Selector patterns
+# IDs (`#id`)
+PAT_ID = fr'\#{IDENTIFIER}'
+# Classes (`.class`)
+PAT_CLASS = fr'\.{IDENTIFIER}'
+# Prefix:Tag (`prefix|tag`)
+PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
+# Attributes (`[attr]`, `[attr=value]`, etc.)
+PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}'
+# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
+PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
+# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
+PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
+# Custom pseudo class (`:--custom-pseudo`)
+PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
+# Nesting ampersand selector. Matches `&`
+PAT_AMP = r'&'
+# Closing pseudo group (`)`)
+PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
+# Pseudo element (`::pseudo-element`)
+PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
+# At rule (`@page`, etc.) (not supported)
+PAT_AT_RULE = fr'@P{IDENTIFIER}'
+# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
+PAT_PSEUDO_NTH_CHILD = fr'''
+(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
+(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
+'''
+# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
+PAT_PSEUDO_NTH_TYPE = fr'''
+(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
+(?P<nth_type>{NTH}|even|odd)){WSC}*\)
+'''
+# Pseudo class language (`:lang("*-de", en)`)
+PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
+# Pseudo class direction (`:dir(ltr)`)
+PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
+# Combining characters (`>`, `~`, ` `, `+`, `,`)
+PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
+# Extra: Contains (`:contains(text)`)
+PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
+
+# Regular expressions
+# CSS escape pattern
+RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
+RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
+# Pattern to break up `nth` specifiers
+RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
+# Pattern to iterate multiple values.
+RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
+# Whitespace checks
+RE_WS = re.compile(WS)
+RE_WS_BEGIN = re.compile(fr'^{WSC}*')
+RE_WS_END = re.compile(fr'{WSC}*$')
+RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
+
+# Constants
+# List split token
+COMMA_COMBINATOR = ','
+# Relation token for descendant
+WS_COMBINATOR = " "
+
+# Parse flags
+FLG_PSEUDO = 0x01
+FLG_NOT = 0x02
+FLG_RELATIVE = 0x04
+FLG_DEFAULT = 0x08
+FLG_HTML = 0x10
+FLG_INDETERMINATE = 0x20
+FLG_OPEN = 0x40
+FLG_IN_RANGE = 0x80
+FLG_OUT_OF_RANGE = 0x100
+FLG_PLACEHOLDER_SHOWN = 0x200
+FLG_FORGIVE = 0x400
+
+# Maximum cached patterns to store
+_MAXCACHE = 500
+
+
+@lru_cache(maxsize=_MAXCACHE)
+def _cached_css_compile(
+ pattern: str,
+ namespaces: ct.Namespaces | None,
+ custom: ct.CustomSelectors | None,
+ flags: int
+) -> cm.SoupSieve:
+ """Cached CSS compile."""
+
+ custom_selectors = process_custom(custom)
+ return cm.SoupSieve(
+ pattern,
+ CSSParser(
+ pattern,
+ custom=custom_selectors,
+ flags=flags
+ ).process_selectors(),
+ namespaces,
+ custom,
+ flags
+ )
+
+
+def _purge_cache() -> None:
+ """Purge the cache."""
+
+ _cached_css_compile.cache_clear()
+
+
+def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
+ """Process custom."""
+
+ custom_selectors = {}
+ if custom is not None:
+ for key, value in custom.items():
+ name = util.lower(key)
+ if RE_CUSTOM.match(name) is None:
+ raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
+ if name in custom_selectors:
+ raise KeyError(f"The custom selector '{name}' has already been registered")
+ custom_selectors[css_unescape(name)] = value
+ return custom_selectors
+
+
+def css_unescape(content: str, string: bool = False) -> str:
+ """
+ Unescape CSS value.
+
+ Strings allow for spanning the value on multiple strings by escaping a new line.
+ """
+
+ def replace(m: Match[str]) -> str:
+ """Replace with the appropriate substitute."""
+
+ if m.group(1):
+ codepoint = int(m.group(1)[1:], 16)
+ if codepoint == 0:
+ codepoint = UNICODE_REPLACEMENT_CHAR
+ value = chr(codepoint)
+ elif m.group(2):
+ value = m.group(2)[1:]
+ elif m.group(3):
+ value = '\ufffd'
+ else:
+ value = ''
+
+ return value
+
+ return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
+
+
+def escape(ident: str) -> str:
+ """Escape identifier."""
+
+ string = []
+ length = len(ident)
+ start_dash = length > 0 and ident[0] == '-'
+ if length == 1 and start_dash:
+ # Need to escape identifier that is a single `-` with no other characters
+ string.append(f'\\{ident}')
+ else:
+ for index, c in enumerate(ident):
+ codepoint = ord(c)
+ if codepoint == 0x00:
+ string.append('\ufffd')
+ elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
+ string.append(f'\\{codepoint:x} ')
+ elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
+ string.append(f'\\{codepoint:x} ')
+ elif (
+ codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
+ (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
+ ):
+ string.append(c)
+ else:
+ string.append(f'\\{c}')
+ return ''.join(string)
+
+
+class SelectorPattern:
+ """Selector pattern."""
+
+ def __init__(self, name: str, pattern: str) -> None:
+ """Initialize."""
+
+ self.name = name
+ self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
+
+ def get_name(self) -> str:
+ """Get name."""
+
+ return self.name
+
+ def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
+ """Match the selector."""
+
+ return self.re_pattern.match(selector, index)
+
+
+class SpecialPseudoPattern(SelectorPattern):
+ """Selector pattern."""
+
+ def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
+ """Initialize."""
+
+ self.patterns = {}
+ for p in patterns:
+ name = p[0]
+ pattern = p[3](name, p[2])
+ for pseudo in p[1]:
+ self.patterns[pseudo] = pattern
+
+ self.matched_name = None # type: SelectorPattern | None
+ self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
+
+ def get_name(self) -> str:
+ """Get name."""
+
+ return '' if self.matched_name is None else self.matched_name.get_name()
+
+ def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
+ """Match the selector."""
+
+ pseudo = None
+ m = self.re_pseudo_name.match(selector, index)
+ if m:
+ name = util.lower(css_unescape(m.group('name')))
+ pattern = self.patterns.get(name)
+ if pattern:
+ pseudo = pattern.match(selector, index, flags)
+ if pseudo:
+ self.matched_name = pattern
+
+ return pseudo
+
+
+class _Selector:
+ """
+ Intermediate selector class.
+
+ This stores selector data for a compound selector as we are acquiring them.
+ Once we are done collecting the data for a compound selector, we freeze
+ the data in an object that can be pickled and hashed.
+ """
+
+ def __init__(self, **kwargs: Any) -> None:
+ """Initialize."""
+
+ self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
+ self.ids = kwargs.get('ids', []) # type: list[str]
+ self.classes = kwargs.get('classes', []) # type: list[str]
+ self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
+ self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
+ self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
+ self.relations = kwargs.get('relations', []) # type: list[_Selector]
+ self.rel_type = kwargs.get('rel_type', None) # type: str | None
+ self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
+ self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
+ self.flags = kwargs.get('flags', 0) # type: int
+ self.no_match = kwargs.get('no_match', False) # type: bool
+
+ def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
+ """Freeze relation."""
+
+ if relations:
+ sel = relations[0]
+ sel.relations.extend(relations[1:])
+ return ct.SelectorList([sel.freeze()])
+ else:
+ return ct.SelectorList()
+
+ def freeze(self) -> ct.Selector | ct.SelectorNull:
+ """Freeze self."""
+
+ if self.no_match:
+ return ct.SelectorNull()
+ else:
+ return ct.Selector(
+ self.tag,
+ tuple(self.ids),
+ tuple(self.classes),
+ tuple(self.attributes),
+ tuple(self.nth),
+ tuple(self.selectors),
+ self._freeze_relations(self.relations),
+ self.rel_type,
+ tuple(self.contains),
+ tuple(self.lang),
+ self.flags
+ )
+
+ def __str__(self) -> str: # pragma: no cover
+ """String representation."""
+
+ return (
+ f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
+ f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
+ f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
+ f'no_match={self.no_match!r})'
+ )
+
+ __repr__ = __str__
+
+
+class CSSParser:
+ """Parse CSS selectors."""
+
+ css_tokens = (
+ SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
+ SpecialPseudoPattern(
+ (
+ (
+ "pseudo_contains",
+ (':contains', ':-soup-contains', ':-soup-contains-own'),
+ PAT_PSEUDO_CONTAINS,
+ SelectorPattern
+ ),
+ ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
+ ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
+ ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
+ ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
+ )
+ ),
+ SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
+ SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS),
+ SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT),
+ SelectorPattern("amp", PAT_AMP),
+ SelectorPattern("at_rule", PAT_AT_RULE),
+ SelectorPattern("id", PAT_ID),
+ SelectorPattern("class", PAT_CLASS),
+ SelectorPattern("tag", PAT_TAG),
+ SelectorPattern("attribute", PAT_ATTR),
+ SelectorPattern("combine", PAT_COMBINE)
+ )
+
+ def __init__(
+ self,
+ selector: str,
+ custom: dict[str, str | ct.SelectorList] | None = None,
+ flags: int = 0
+ ) -> None:
+ """Initialize."""
+
+ self.pattern = selector.replace('\x00', '\ufffd')
+ self.flags = flags
+ self.debug = self.flags & util.DEBUG
+ self.custom = {} if custom is None else custom
+
+ def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+ """Create attribute selector from the returned regex match."""
+
+ inverse = False
+ op = m.group('cmp')
+ case = util.lower(m.group('case')) if m.group('case') else None
+ ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
+ attr = css_unescape(m.group('attr_name'))
+ is_type = False
+ pattern2 = None
+ value = ''
+
+ if case:
+ flags = (re.I if case == 'i' else 0) | re.DOTALL
+ elif util.lower(attr) == 'type':
+ flags = re.I | re.DOTALL
+ is_type = True
+ else:
+ flags = re.DOTALL
+
+ if op:
+ if m.group('value').startswith(('"', "'")):
+ value = css_unescape(m.group('value')[1:-1], True)
+ else:
+ value = css_unescape(m.group('value'))
+
+ if not op:
+ # Attribute name
+ pattern = None
+ elif op.startswith('^'):
+ # Value start with
+ pattern = re.compile(r'^%s.*' % re.escape(value), flags)
+ elif op.startswith('$'):
+ # Value ends with
+ pattern = re.compile(r'.*?%s$' % re.escape(value), flags)
+ elif op.startswith('*'):
+ # Value contains
+ pattern = re.compile(r'.*?%s.*' % re.escape(value), flags)
+ elif op.startswith('~'):
+ # Value contains word within space separated list
+ # `~=` should match nothing if it is empty or contains whitespace,
+ # so if either of these cases is present, use `[^\s\S]` which cannot be matched.
+ value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value)
+ pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags)
+ elif op.startswith('|'):
+ # Value starts with word in dash separated list
+ pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
+ else:
+ # Value matches
+ pattern = re.compile(r'^%s$' % re.escape(value), flags)
+ if op.startswith('!'):
+ # Equivalent to `:not([attr=value])`
+ inverse = True
+ if is_type and pattern:
+ pattern2 = re.compile(pattern.pattern)
+
+ # Append the attribute selector
+ sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2)
+ if inverse:
+ # If we are using `!=`, we need to nest the pattern under a `:not()`.
+ sub_sel = _Selector()
+ sub_sel.attributes.append(sel_attr)
+ not_list = ct.SelectorList([sub_sel.freeze()], True, False)
+ sel.selectors.append(not_list)
+ else:
+ sel.attributes.append(sel_attr)
+
+ has_selector = True
+ return has_selector
+
+ def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+ """Parse tag pattern from regex match."""
+
+ prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
+ tag = css_unescape(m.group('tag_name'))
+ sel.tag = ct.SelectorTag(tag, prefix)
+ has_selector = True
+ return has_selector
+
+ def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+ """
+ Parse custom pseudo class alias.
+
+ Compile custom selectors as we need them. When compiling a custom selector,
+ set it to `None` in the dictionary so we can avoid an infinite loop.
+ """
+
+ pseudo = util.lower(css_unescape(m.group('name')))
+ selector = self.custom.get(pseudo)
+ if selector is None:
+ raise SelectorSyntaxError(
+ f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
+ self.pattern,
+ m.end(0)
+ )
+
+ if not isinstance(selector, ct.SelectorList):
+ del self.custom[pseudo]
+ selector = CSSParser(
+ selector, custom=self.custom, flags=self.flags
+ ).process_selectors(flags=FLG_PSEUDO)
+ self.custom[pseudo] = selector
+
+ sel.selectors.append(selector)
+ has_selector = True
+ return has_selector
+
+ def parse_pseudo_class(
+ self,
+ sel: _Selector,
+ m: Match[str],
+ has_selector: bool,
+ iselector: Iterator[tuple[str, Match[str]]],
+ is_html: bool
+ ) -> tuple[bool, bool]:
+ """Parse pseudo class."""
+
+ complex_pseudo = False
+ pseudo = util.lower(css_unescape(m.group('name')))
+ if m.group('open'):
+ complex_pseudo = True
+ if complex_pseudo and pseudo in PSEUDO_COMPLEX:
+ has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0))
+ elif not complex_pseudo and pseudo in PSEUDO_SIMPLE:
+ if pseudo == ':root':
+ sel.flags |= ct.SEL_ROOT
+ elif pseudo == ':defined':
+ sel.flags |= ct.SEL_DEFINED
+ is_html = True
+ elif pseudo == ':scope':
+ sel.flags |= ct.SEL_SCOPE
+ elif pseudo == ':empty':
+ sel.flags |= ct.SEL_EMPTY
+ elif pseudo in (':link', ':any-link'):
+ sel.selectors.append(CSS_LINK)
+ elif pseudo == ':checked':
+ sel.selectors.append(CSS_CHECKED)
+ elif pseudo == ':default':
+ sel.selectors.append(CSS_DEFAULT)
+ elif pseudo == ':indeterminate':
+ sel.selectors.append(CSS_INDETERMINATE)
+ elif pseudo == ":disabled":
+ sel.selectors.append(CSS_DISABLED)
+ elif pseudo == ":enabled":
+ sel.selectors.append(CSS_ENABLED)
+ elif pseudo == ":required":
+ sel.selectors.append(CSS_REQUIRED)
+ elif pseudo == ":optional":
+ sel.selectors.append(CSS_OPTIONAL)
+ elif pseudo == ":read-only":
+ sel.selectors.append(CSS_READ_ONLY)
+ elif pseudo == ":read-write":
+ sel.selectors.append(CSS_READ_WRITE)
+ elif pseudo == ":in-range":
+ sel.selectors.append(CSS_IN_RANGE)
+ elif pseudo == ":out-of-range":
+ sel.selectors.append(CSS_OUT_OF_RANGE)
+ elif pseudo == ":placeholder-shown":
+ sel.selectors.append(CSS_PLACEHOLDER_SHOWN)
+ elif pseudo == ':first-child':
+ sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()))
+ elif pseudo == ':last-child':
+ sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()))
+ elif pseudo == ':first-of-type':
+ sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()))
+ elif pseudo == ':last-of-type':
+ sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()))
+ elif pseudo == ':only-child':
+ sel.nth.extend(
+ [
+ ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()),
+ ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())
+ ]
+ )
+ elif pseudo == ':only-of-type':
+ sel.nth.extend(
+ [
+ ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()),
+ ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())
+ ]
+ )
+ has_selector = True
+ elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH:
+ self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
+ sel.no_match = True
+ has_selector = True
+ elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH:
+ sel.no_match = True
+ has_selector = True
+ elif pseudo in PSEUDO_SUPPORTED:
+ raise SelectorSyntaxError(
+ f"Invalid syntax for pseudo class '{pseudo}'",
+ self.pattern,
+ m.start(0)
+ )
+ else:
+ raise SelectorSyntaxError(
+ f"'{pseudo}' was detected as a pseudo-class and is either unsupported or invalid. "
+ "If the syntax was not intended to be recognized as a pseudo-class, please escape the colon.",
+ self.pattern,
+ m.start(0)
+ )
+
+ return has_selector, is_html
+
+ def parse_pseudo_nth(
+ self,
+ sel: _Selector,
+ m: Match[str],
+ has_selector: bool,
+ iselector: Iterator[tuple[str, Match[str]]]
+ ) -> bool:
+ """Parse `nth` pseudo."""
+
+ mdict = m.groupdict()
+ if mdict.get('pseudo_nth_child'):
+ postfix = '_child'
+ else:
+ postfix = '_type'
+ mdict['name'] = util.lower(css_unescape(mdict['name']))
+ content = util.lower(mdict.get('nth' + postfix))
+ if content == 'even':
+ # 2n
+ s1 = 2
+ s2 = 0
+ var = True
+ elif content == 'odd':
+ # 2n+1
+ s1 = 2
+ s2 = 1
+ var = True
+ else:
+ nth_parts = cast(Match[str], RE_NTH.match(content))
+ _s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
+ a = nth_parts.group('a')
+ var = a.endswith('n')
+ if a.startswith('n'):
+ _s1 += '1'
+ elif var:
+ _s1 += a[:-1]
+ else:
+ _s1 += a
+ _s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
+ if nth_parts.group('b'):
+ _s2 += nth_parts.group('b')
+ else:
+ _s2 = '0'
+ s1 = int(_s1, 10)
+ s2 = int(_s2, 10)
+
+ pseudo_sel = mdict['name']
+ if postfix == '_child':
+ if m.group('of'):
+ # Parse the rest of `of S`.
+ nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
+ else:
+ # Use default `*|*` for `of S`.
+ nth_sel = CSS_NTH_OF_S_DEFAULT
+ if pseudo_sel == ':nth-child':
+ sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel))
+ elif pseudo_sel == ':nth-last-child':
+ sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel))
+ else:
+ if pseudo_sel == ':nth-of-type':
+ sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList()))
+ elif pseudo_sel == ':nth-last-of-type':
+ sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList()))
+ has_selector = True
+ return has_selector
+
+ def parse_pseudo_open(
+ self,
+ sel: _Selector,
+ name: str,
+ has_selector: bool,
+ iselector: Iterator[tuple[str, Match[str]]],
+ index: int
+ ) -> bool:
+ """Parse pseudo with opening bracket."""
+
+ flags = FLG_PSEUDO | FLG_OPEN
+ if name == ':not':
+ flags |= FLG_NOT
+ elif name == ':has':
+ flags |= FLG_RELATIVE
+ elif name in (':where', ':is'):
+ flags |= FLG_FORGIVE
+
+ sel.selectors.append(self.parse_selectors(iselector, index, flags))
+ has_selector = True
+
+ return has_selector
+
+ def parse_has_combinator(
+ self,
+ sel: _Selector,
+ m: Match[str],
+ has_selector: bool,
+ selectors: list[_Selector],
+ rel_type: str,
+ index: int
+ ) -> tuple[bool, _Selector, str]:
+ """Parse combinator tokens."""
+
+ combinator = m.group('relation').strip()
+ if not combinator:
+ combinator = WS_COMBINATOR
+ if combinator == COMMA_COMBINATOR:
+ sel.rel_type = rel_type
+ selectors[-1].relations.append(sel)
+ rel_type = ":" + WS_COMBINATOR
+ selectors.append(_Selector())
+ else:
+ if has_selector:
+ # End the current selector and associate the leading combinator with this selector.
+ sel.rel_type = rel_type
+ selectors[-1].relations.append(sel)
+ elif rel_type[1:] != WS_COMBINATOR:
+ # It's impossible to have two whitespace combinators after each other as the patterns
+ # will gobble up trailing whitespace. It is also impossible to have a whitespace
+ # combinator after any other kind for the same reason. But we could have
+ # multiple non-whitespace combinators. So if the current combinator is not a whitespace,
+ # then we've hit the multiple combinator case, so we should fail.
+ raise SelectorSyntaxError(
+ f'The multiple combinators at position {index}',
+ self.pattern,
+ index
+ )
+
+ # Set the leading combinator for the next selector.
+ rel_type = ':' + combinator
+
+ sel = _Selector()
+ has_selector = False
+ return has_selector, sel, rel_type
+
+ def parse_combinator(
+ self,
+ sel: _Selector,
+ m: Match[str],
+ has_selector: bool,
+ selectors: list[_Selector],
+ relations: list[_Selector],
+ is_pseudo: bool,
+ is_forgive: bool,
+ index: int
+ ) -> tuple[bool, _Selector]:
+ """Parse combinator tokens."""
+
+ combinator = m.group('relation').strip()
+ if not combinator:
+ combinator = WS_COMBINATOR
+ if not has_selector:
+ if not is_forgive or combinator != COMMA_COMBINATOR:
+ raise SelectorSyntaxError(
+ f"The combinator '{combinator}' at position {index}, must have a selector before it",
+ self.pattern,
+ index
+ )
+
+ # If we are in a forgiving pseudo class, just make the selector a "no match"
+ if combinator == COMMA_COMBINATOR:
+ sel.no_match = True
+ del relations[:]
+ selectors.append(sel)
+ else:
+ if combinator == COMMA_COMBINATOR:
+ if not sel.tag and not is_pseudo:
+ # Implied `*`
+ sel.tag = ct.SelectorTag('*', None)
+ sel.relations.extend(relations)
+ selectors.append(sel)
+ del relations[:]
+ else:
+ sel.relations.extend(relations)
+ sel.rel_type = combinator
+ del relations[:]
+ relations.append(sel)
+
+ sel = _Selector()
+ has_selector = False
+
+ return has_selector, sel
+
+ def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+ """Parse HTML classes and ids."""
+
+ selector = m.group(0)
+ if selector.startswith('.'):
+ sel.classes.append(css_unescape(selector[1:]))
+ else:
+ sel.ids.append(css_unescape(selector[1:]))
+ has_selector = True
+ return has_selector
+
+ def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+ """Parse contains."""
+
+ pseudo = util.lower(css_unescape(m.group('name')))
+ if pseudo == ":contains":
+ warnings.warn( # noqa: B028
+ "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
+ FutureWarning
+ )
+ contains_own = pseudo == ":-soup-contains-own"
+ values = css_unescape(m.group('values'))
+ patterns = []
+ for token in RE_VALUES.finditer(values):
+ if token.group('split'):
+ continue
+ value = token.group('value')
+ if value.startswith(("'", '"')):
+ value = css_unescape(value[1:-1], True)
+ else:
+ value = css_unescape(value)
+ patterns.append(value)
+ sel.contains.append(ct.SelectorContains(patterns, contains_own))
+ has_selector = True
+ return has_selector
+
+ def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+ """Parse pseudo language."""
+
+ values = m.group('values')
+ patterns = []
+ for token in RE_VALUES.finditer(values):
+ if token.group('split'):
+ continue
+ value = token.group('value')
+ if value.startswith(('"', "'")):
+ value = css_unescape(value[1:-1], True)
+ else:
+ value = css_unescape(value)
+
+ patterns.append(value)
+
+ sel.lang.append(ct.SelectorLang(patterns))
+ has_selector = True
+
+ return has_selector
+
+ def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+ """Parse pseudo direction."""
+
+ value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
+ sel.flags |= value
+ has_selector = True
+ return has_selector
+
+ def parse_selectors(
+ self,
+ iselector: Iterator[tuple[str, Match[str]]],
+ index: int = 0,
+ flags: int = 0
+ ) -> ct.SelectorList:
+ """Parse selectors."""
+
+ # Initialize important variables
+ sel = _Selector()
+ selectors = []
+ has_selector = False
+ closed = False
+ relations = [] # type: list[_Selector]
+ rel_type = ":" + WS_COMBINATOR
+
+ # Setup various flags
+ is_open = bool(flags & FLG_OPEN)
+ is_pseudo = bool(flags & FLG_PSEUDO)
+ is_relative = bool(flags & FLG_RELATIVE)
+ is_not = bool(flags & FLG_NOT)
+ is_html = bool(flags & FLG_HTML)
+ is_default = bool(flags & FLG_DEFAULT)
+ is_indeterminate = bool(flags & FLG_INDETERMINATE)
+ is_in_range = bool(flags & FLG_IN_RANGE)
+ is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
+ is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
+ is_forgive = bool(flags & FLG_FORGIVE)
+
+ # Print out useful debug stuff
+ if self.debug: # pragma: no cover
+ if is_pseudo:
+ print(' is_pseudo: True')
+ if is_open:
+ print(' is_open: True')
+ if is_relative:
+ print(' is_relative: True')
+ if is_not:
+ print(' is_not: True')
+ if is_html:
+ print(' is_html: True')
+ if is_default:
+ print(' is_default: True')
+ if is_indeterminate:
+ print(' is_indeterminate: True')
+ if is_in_range:
+ print(' is_in_range: True')
+ if is_out_of_range:
+ print(' is_out_of_range: True')
+ if is_placeholder_shown:
+ print(' is_placeholder_shown: True')
+ if is_forgive:
+ print(' is_forgive: True')
+
+ # The algorithm for relative selectors require an initial selector in the selector list
+ if is_relative:
+ selectors.append(_Selector())
+
+ try:
+ while True:
+ key, m = next(iselector)
+
+ # Handle parts
+ if key == "at_rule":
+ raise NotImplementedError(f"At-rules found at position {m.start(0)}")
+ elif key == "amp":
+ sel.flags |= ct.SEL_SCOPE
+ has_selector = True
+ elif key == 'pseudo_class_custom':
+ has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
+ elif key == 'pseudo_class':
+ has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
+ elif key == 'pseudo_element':
+ raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
+ elif key == 'pseudo_contains':
+ has_selector = self.parse_pseudo_contains(sel, m, has_selector)
+ elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
+ has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector)
+ elif key == 'pseudo_lang':
+ has_selector = self.parse_pseudo_lang(sel, m, has_selector)
+ elif key == 'pseudo_dir':
+ has_selector = self.parse_pseudo_dir(sel, m, has_selector)
+ # Currently only supports HTML
+ is_html = True
+ elif key == 'pseudo_close':
+ if not has_selector:
+ if not is_forgive:
+ raise SelectorSyntaxError(
+ f"Expected a selector at position {m.start(0)}",
+ self.pattern,
+ m.start(0)
+ )
+ sel.no_match = True
+ if is_open:
+ closed = True
+ break
+ else:
+ raise SelectorSyntaxError(
+ f"Unmatched pseudo-class close at position {m.start(0)}",
+ self.pattern,
+ m.start(0)
+ )
+ elif key == 'combine':
+ if is_relative:
+ has_selector, sel, rel_type = self.parse_has_combinator(
+ sel, m, has_selector, selectors, rel_type, index
+ )
+ else:
+ has_selector, sel = self.parse_combinator(
+ sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index
+ )
+ elif key == 'attribute':
+ has_selector = self.parse_attribute_selector(sel, m, has_selector)
+ elif key == 'tag':
+ if has_selector:
+ raise SelectorSyntaxError(
+ f"Tag name found at position {m.start(0)} instead of at the start",
+ self.pattern,
+ m.start(0)
+ )
+ has_selector = self.parse_tag_pattern(sel, m, has_selector)
+ elif key in ('class', 'id'):
+ has_selector = self.parse_class_id(sel, m, has_selector)
+
+ index = m.end(0)
+ except StopIteration:
+ pass
+
+ # Handle selectors that are not closed
+ if is_open and not closed:
+ raise SelectorSyntaxError(
+ f"Unclosed pseudo-class at position {index}",
+ self.pattern,
+ index
+ )
+
+ # Cleanup completed selector piece
+ if has_selector:
+ if not sel.tag and not is_pseudo:
+ # Implied `*`
+ sel.tag = ct.SelectorTag('*', None)
+ if is_relative:
+ sel.rel_type = rel_type
+ selectors[-1].relations.append(sel)
+ else:
+ sel.relations.extend(relations)
+ del relations[:]
+ selectors.append(sel)
+
+ # Forgive empty slots in pseudo-classes that have lists (and are forgiving)
+ elif is_forgive and (not selectors or not relations):
+ # Handle normal pseudo-classes with empty slots like `:is()` etc.
+ sel.no_match = True
+ del relations[:]
+ selectors.append(sel)
+ has_selector = True
+
+ if not has_selector:
+ # We will always need to finish a selector when `:has()` is used as it leads with combining.
+ # May apply to others as well.
+ raise SelectorSyntaxError(
+ f'Expected a selector at position {index}',
+ self.pattern,
+ index
+ )
+
+ # Some patterns require additional logic, such as default. We try to make these the
+ # last pattern, and append the appropriate flag to that selector which communicates
+ # to the matcher what additional logic is required.
+ if is_default:
+ selectors[-1].flags = ct.SEL_DEFAULT
+ if is_indeterminate:
+ selectors[-1].flags = ct.SEL_INDETERMINATE
+ if is_in_range:
+ selectors[-1].flags = ct.SEL_IN_RANGE
+ if is_out_of_range:
+ selectors[-1].flags = ct.SEL_OUT_OF_RANGE
+ if is_placeholder_shown:
+ selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
+
+ # Return selector list
+ return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
+
+ def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
+ """Iterate selector tokens."""
+
+ # Ignore whitespace and comments at start and end of pattern
+ m = RE_WS_BEGIN.search(pattern)
+ index = m.end(0) if m else 0
+ m = RE_WS_END.search(pattern)
+ end = (m.start(0) - 1) if m else (len(pattern) - 1)
+
+ if self.debug: # pragma: no cover
+ print(f'## PARSING: {pattern!r}')
+ while index <= end:
+ m = None
+ for v in self.css_tokens:
+ m = v.match(pattern, index, self.flags)
+ if m:
+ name = v.get_name()
+ if self.debug: # pragma: no cover
+ print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
+ index = m.end(0)
+ yield name, m
+ break
+ if m is None:
+ c = pattern[index]
+ # If the character represents the start of one of the known selector types,
+ # throw an exception mentioning that the known selector type is in error;
+ # otherwise, report the invalid character.
+ if c == '[':
+ msg = f"Malformed attribute selector at position {index}"
+ elif c == '.':
+ msg = f"Malformed class selector at position {index}"
+ elif c == '#':
+ msg = f"Malformed id selector at position {index}"
+ elif c == ':':
+ msg = f"Malformed pseudo-class selector at position {index}"
+ else:
+ msg = f"Invalid character {c!r} position {index}"
+ raise SelectorSyntaxError(msg, self.pattern, index)
+ if self.debug: # pragma: no cover
+ print('## END PARSING')
+
+ def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList:
+ """Process selectors."""
+
+ return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
+
+
+# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern)
+# A few patterns are order dependent as they use patterns previous compiled.
+
+# CSS pattern for `:link` and `:any-link`
+CSS_LINK = CSSParser(
+ 'html|*:is(a, area)[href]'
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:checked`
+CSS_CHECKED = CSSParser(
+ '''
+ html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:default` (must compile CSS_CHECKED first)
+CSS_DEFAULT = CSSParser(
+ '''
+ :checked,
+
+ /*
+ This pattern must be at the end.
+ Special logic is applied to the last selector.
+ */
+ html|form html|*:is(button, input)[type="submit"]
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT)
+# CSS pattern for `:indeterminate`
+CSS_INDETERMINATE = CSSParser(
+ '''
+ html|input[type="checkbox"][indeterminate],
+ html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]),
+ html|progress:not([value]),
+
+ /*
+ This pattern must be at the end.
+ Special logic is applied to the last selector.
+ */
+ html|input[type="radio"][name]:not([name='']):not([checked])
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
+# CSS pattern for `:disabled`
+CSS_DISABLED = CSSParser(
+ '''
+ html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
+ html|optgroup[disabled] > html|option,
+ html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
+ html|fieldset[disabled] >
+ html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:enabled`
+CSS_ENABLED = CSSParser(
+ '''
+ html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:required`
+CSS_REQUIRED = CSSParser(
+ 'html|*:is(input, textarea, select)[required]'
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:optional`
+CSS_OPTIONAL = CSSParser(
+ 'html|*:is(input, textarea, select):not([required])'
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:placeholder-shown`
+CSS_PLACEHOLDER_SHOWN = CSSParser(
+ '''
+ html|input:is(
+ :not([type]),
+ [type=""],
+ [type=text],
+ [type=search],
+ [type=url],
+ [type=tel],
+ [type=email],
+ [type=password],
+ [type=number]
+ )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
+ html|textarea[placeholder]:not([placeholder=''])
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
+# CSS pattern default for `:nth-child` "of S" feature
+CSS_NTH_OF_S_DEFAULT = CSSParser(
+ '*|*'
+).process_selectors(flags=FLG_PSEUDO)
+# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first)
+CSS_READ_WRITE = CSSParser(
+ '''
+ html|*:is(
+ textarea,
+ input:is(
+ :not([type]),
+ [type=""],
+ [type=text],
+ [type=search],
+ [type=url],
+ [type=tel],
+ [type=email],
+ [type=number],
+ [type=password],
+ [type=date],
+ [type=datetime-local],
+ [type=month],
+ [type=time],
+ [type=week]
+ )
+ ):not([readonly], :disabled),
+ html|*:is([contenteditable=""], [contenteditable="true" i])
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:read-only`
+CSS_READ_ONLY = CSSParser(
+ '''
+ html|*:not(:read-write)
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:in-range`
+CSS_IN_RANGE = CSSParser(
+ '''
+ html|input:is(
+ [type="date"],
+ [type="month"],
+ [type="week"],
+ [type="time"],
+ [type="datetime-local"],
+ [type="number"],
+ [type="range"]
+ ):is(
+ [min],
+ [max]
+ )
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML)
+# CSS pattern for `:out-of-range`
+CSS_OUT_OF_RANGE = CSSParser(
+ '''
+ html|input:is(
+ [type="date"],
+ [type="month"],
+ [type="week"],
+ [type="time"],
+ [type="datetime-local"],
+ [type="number"],
+ [type="range"]
+ ):is(
+ [min],
+ [max]
+ )
+ '''
+).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML)
diff --git a/.venv/lib/python3.12/site-packages/soupsieve/css_types.py b/.venv/lib/python3.12/site-packages/soupsieve/css_types.py
new file mode 100644
index 00000000..71a6519b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/soupsieve/css_types.py
@@ -0,0 +1,407 @@
+"""CSS selector structure items."""
+from __future__ import annotations
+import copyreg
+from .pretty import pretty
+from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
+
+__all__ = (
+ 'Selector',
+ 'SelectorNull',
+ 'SelectorTag',
+ 'SelectorAttribute',
+ 'SelectorContains',
+ 'SelectorNth',
+ 'SelectorLang',
+ 'SelectorList',
+ 'Namespaces',
+ 'CustomSelectors'
+)
+
+
+SEL_EMPTY = 0x1
+SEL_ROOT = 0x2
+SEL_DEFAULT = 0x4
+SEL_INDETERMINATE = 0x8
+SEL_SCOPE = 0x10
+SEL_DIR_LTR = 0x20
+SEL_DIR_RTL = 0x40
+SEL_IN_RANGE = 0x80
+SEL_OUT_OF_RANGE = 0x100
+SEL_DEFINED = 0x200
+SEL_PLACEHOLDER_SHOWN = 0x400
+
+
+class Immutable:
+ """Immutable."""
+
+ __slots__: tuple[str, ...] = ('_hash',)
+
+ _hash: int
+
+ def __init__(self, **kwargs: Any) -> None:
+ """Initialize."""
+
+ temp = []
+ for k, v in kwargs.items():
+ temp.append(type(v))
+ temp.append(v)
+ super().__setattr__(k, v)
+ super().__setattr__('_hash', hash(tuple(temp)))
+
+ @classmethod
+ def __base__(cls) -> type[Immutable]:
+ """Get base class."""
+
+ return cls
+
+ def __eq__(self, other: Any) -> bool:
+ """Equal."""
+
+ return (
+ isinstance(other, self.__base__()) and
+ all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash')
+ )
+
+ def __ne__(self, other: Any) -> bool:
+ """Equal."""
+
+ return (
+ not isinstance(other, self.__base__()) or
+ any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash')
+ )
+
+ def __hash__(self) -> int:
+ """Hash."""
+
+ return self._hash
+
+ def __setattr__(self, name: str, value: Any) -> None:
+ """Prevent mutability."""
+
+ raise AttributeError(f"'{self.__class__.__name__}' is immutable")
+
+ def __repr__(self) -> str: # pragma: no cover
+ """Representation."""
+
+ r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]])
+ return f"{self.__class__.__name__}({r})"
+
+ __str__ = __repr__
+
+ def pretty(self) -> None: # pragma: no cover
+ """Pretty print."""
+
+ print(pretty(self))
+
+
+class ImmutableDict(Mapping[Any, Any]):
+ """Hashable, immutable dictionary."""
+
+ def __init__(
+ self,
+ arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
+ ) -> None:
+ """Initialize."""
+
+ self._validate(arg)
+ self._d = dict(arg)
+ self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
+
+ def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
+ """Validate arguments."""
+
+ if isinstance(arg, dict):
+ if not all(isinstance(v, Hashable) for v in arg.values()):
+ raise TypeError(f'{self.__class__.__name__} values must be hashable')
+ elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg):
+ raise TypeError(f'{self.__class__.__name__} values must be hashable')
+
+ def __iter__(self) -> Iterator[Any]:
+ """Iterator."""
+
+ return iter(self._d)
+
+ def __len__(self) -> int:
+ """Length."""
+
+ return len(self._d)
+
+ def __getitem__(self, key: Any) -> Any:
+ """Get item: `namespace['key']`."""
+
+ return self._d[key]
+
+ def __hash__(self) -> int:
+ """Hash."""
+
+ return self._hash
+
+ def __repr__(self) -> str: # pragma: no cover
+ """Representation."""
+
+ return f"{self._d!r}"
+
+ __str__ = __repr__
+
+
+class Namespaces(ImmutableDict):
+ """Namespaces."""
+
+ def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
+ """Initialize."""
+
+ super().__init__(arg)
+
+ def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
+ """Validate arguments."""
+
+ if isinstance(arg, dict):
+ if not all(isinstance(v, str) for v in arg.values()):
+ raise TypeError(f'{self.__class__.__name__} values must be hashable')
+ elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
+ raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
+
+
+class CustomSelectors(ImmutableDict):
+ """Custom selectors."""
+
+ def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
+ """Initialize."""
+
+ super().__init__(arg)
+
+ def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
+ """Validate arguments."""
+
+ if isinstance(arg, dict):
+ if not all(isinstance(v, str) for v in arg.values()):
+ raise TypeError(f'{self.__class__.__name__} values must be hashable')
+ elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
+ raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
+
+
+class Selector(Immutable):
+ """Selector."""
+
+ __slots__ = (
+ 'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors',
+ 'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
+ )
+
+ tag: SelectorTag | None
+ ids: tuple[str, ...]
+ classes: tuple[str, ...]
+ attributes: tuple[SelectorAttribute, ...]
+ nth: tuple[SelectorNth, ...]
+ selectors: tuple[SelectorList, ...]
+ relation: SelectorList
+ rel_type: str | None
+ contains: tuple[SelectorContains, ...]
+ lang: tuple[SelectorLang, ...]
+ flags: int
+
+ def __init__(
+ self,
+ tag: SelectorTag | None,
+ ids: tuple[str, ...],
+ classes: tuple[str, ...],
+ attributes: tuple[SelectorAttribute, ...],
+ nth: tuple[SelectorNth, ...],
+ selectors: tuple[SelectorList, ...],
+ relation: SelectorList,
+ rel_type: str | None,
+ contains: tuple[SelectorContains, ...],
+ lang: tuple[SelectorLang, ...],
+ flags: int
+ ):
+ """Initialize."""
+
+ super().__init__(
+ tag=tag,
+ ids=ids,
+ classes=classes,
+ attributes=attributes,
+ nth=nth,
+ selectors=selectors,
+ relation=relation,
+ rel_type=rel_type,
+ contains=contains,
+ lang=lang,
+ flags=flags
+ )
+
+
+class SelectorNull(Immutable):
+ """Null Selector."""
+
+ def __init__(self) -> None:
+ """Initialize."""
+
+ super().__init__()
+
+
+class SelectorTag(Immutable):
+ """Selector tag."""
+
+ __slots__ = ("name", "prefix", "_hash")
+
+ name: str
+ prefix: str | None
+
+ def __init__(self, name: str, prefix: str | None) -> None:
+ """Initialize."""
+
+ super().__init__(name=name, prefix=prefix)
+
+
+class SelectorAttribute(Immutable):
+ """Selector attribute rule."""
+
+ __slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
+
+ attribute: str
+ prefix: str
+ pattern: Pattern[str] | None
+ xml_type_pattern: Pattern[str] | None
+
+ def __init__(
+ self,
+ attribute: str,
+ prefix: str,
+ pattern: Pattern[str] | None,
+ xml_type_pattern: Pattern[str] | None
+ ) -> None:
+ """Initialize."""
+
+ super().__init__(
+ attribute=attribute,
+ prefix=prefix,
+ pattern=pattern,
+ xml_type_pattern=xml_type_pattern
+ )
+
+
+class SelectorContains(Immutable):
+ """Selector contains rule."""
+
+ __slots__ = ("text", "own", "_hash")
+
+ text: tuple[str, ...]
+ own: bool
+
+ def __init__(self, text: Iterable[str], own: bool) -> None:
+ """Initialize."""
+
+ super().__init__(text=tuple(text), own=own)
+
+
+class SelectorNth(Immutable):
+ """Selector nth type."""
+
+ __slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
+
+ a: int
+ n: bool
+ b: int
+ of_type: bool
+ last: bool
+ selectors: SelectorList
+
+ def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
+ """Initialize."""
+
+ super().__init__(
+ a=a,
+ n=n,
+ b=b,
+ of_type=of_type,
+ last=last,
+ selectors=selectors
+ )
+
+
+class SelectorLang(Immutable):
+ """Selector language rules."""
+
+ __slots__ = ("languages", "_hash",)
+
+ languages: tuple[str, ...]
+
+ def __init__(self, languages: Iterable[str]):
+ """Initialize."""
+
+ super().__init__(languages=tuple(languages))
+
+ def __iter__(self) -> Iterator[str]:
+ """Iterator."""
+
+ return iter(self.languages)
+
+ def __len__(self) -> int: # pragma: no cover
+ """Length."""
+
+ return len(self.languages)
+
+ def __getitem__(self, index: int) -> str: # pragma: no cover
+ """Get item."""
+
+ return self.languages[index]
+
+
+class SelectorList(Immutable):
+ """Selector list."""
+
+ __slots__ = ("selectors", "is_not", "is_html", "_hash")
+
+ selectors: tuple[Selector | SelectorNull, ...]
+ is_not: bool
+ is_html: bool
+
+ def __init__(
+ self,
+ selectors: Iterable[Selector | SelectorNull] | None = None,
+ is_not: bool = False,
+ is_html: bool = False
+ ) -> None:
+ """Initialize."""
+
+ super().__init__(
+ selectors=tuple(selectors) if selectors is not None else (),
+ is_not=is_not,
+ is_html=is_html
+ )
+
+ def __iter__(self) -> Iterator[Selector | SelectorNull]:
+ """Iterator."""
+
+ return iter(self.selectors)
+
+ def __len__(self) -> int:
+ """Length."""
+
+ return len(self.selectors)
+
+ def __getitem__(self, index: int) -> Selector | SelectorNull:
+ """Get item."""
+
+ return self.selectors[index]
+
+
+def _pickle(p: Any) -> Any:
+ return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
+
+
+def pickle_register(obj: Any) -> None:
+ """Allow object to be pickled."""
+
+ copyreg.pickle(obj, _pickle)
+
+
+pickle_register(Selector)
+pickle_register(SelectorNull)
+pickle_register(SelectorTag)
+pickle_register(SelectorAttribute)
+pickle_register(SelectorContains)
+pickle_register(SelectorNth)
+pickle_register(SelectorLang)
+pickle_register(SelectorList)
diff --git a/.venv/lib/python3.12/site-packages/soupsieve/pretty.py b/.venv/lib/python3.12/site-packages/soupsieve/pretty.py
new file mode 100644
index 00000000..193db05e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/soupsieve/pretty.py
@@ -0,0 +1,139 @@
+"""
+Format a pretty string of a `SoupSieve` object for easy debugging.
+
+This won't necessarily support all types and such, and definitely
+not support custom outputs.
+
+It is mainly geared towards our types as the `SelectorList`
+object is a beast to look at without some indentation and newlines.
+The format and various output types is fairly known (though it
+hasn't been tested extensively to make sure we aren't missing corners).
+
+Example:
+-------
+```
+>>> import soupsieve as sv
+>>> sv.compile('this > that.class[name=value]').selectors.pretty()
+SelectorList(
+ selectors=(
+ Selector(
+ tag=SelectorTag(
+ name='that',
+ prefix=None),
+ ids=(),
+ classes=(
+ 'class',
+ ),
+ attributes=(
+ SelectorAttribute(
+ attribute='name',
+ prefix='',
+ pattern=re.compile(
+ '^value$'),
+ xml_type_pattern=None),
+ ),
+ nth=(),
+ selectors=(),
+ relation=SelectorList(
+ selectors=(
+ Selector(
+ tag=SelectorTag(
+ name='this',
+ prefix=None),
+ ids=(),
+ classes=(),
+ attributes=(),
+ nth=(),
+ selectors=(),
+ relation=SelectorList(
+ selectors=(),
+ is_not=False,
+ is_html=False),
+ rel_type='>',
+ contains=(),
+ lang=(),
+ flags=0),
+ ),
+ is_not=False,
+ is_html=False),
+ rel_type=None,
+ contains=(),
+ lang=(),
+ flags=0),
+ ),
+ is_not=False,
+ is_html=False)
+```
+
+"""
+from __future__ import annotations
+import re
+from typing import Any
+
+RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
+RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
+RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
+RE_LSTRT = re.compile(r'\[')
+RE_DSTRT = re.compile(r'\{')
+RE_TSTRT = re.compile(r'\(')
+RE_LEND = re.compile(r'\]')
+RE_DEND = re.compile(r'\}')
+RE_TEND = re.compile(r'\)')
+RE_INT = re.compile(r'\d+')
+RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
+RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
+RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
+RE_SEP = re.compile(r'\s*(,)\s*')
+RE_DSEP = re.compile(r'\s*(:)\s*')
+
+TOKENS = {
+ 'class': RE_CLASS,
+ 'param': RE_PARAM,
+ 'empty': RE_EMPTY,
+ 'lstrt': RE_LSTRT,
+ 'dstrt': RE_DSTRT,
+ 'tstrt': RE_TSTRT,
+ 'lend': RE_LEND,
+ 'dend': RE_DEND,
+ 'tend': RE_TEND,
+ 'sqstr': RE_SQSTR,
+ 'sep': RE_SEP,
+ 'dsep': RE_DSEP,
+ 'int': RE_INT,
+ 'kword': RE_KWORD,
+ 'dqstr': RE_DQSTR
+}
+
+
+def pretty(obj: Any) -> str: # pragma: no cover
+ """Make the object output string pretty."""
+
+ sel = str(obj)
+ index = 0
+ end = len(sel) - 1
+ indent = 0
+ output = []
+
+ while index <= end:
+ m = None
+ for k, v in TOKENS.items():
+ m = v.match(sel, index)
+
+ if m:
+ name = k
+ index = m.end(0)
+ if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
+ indent += 4
+ output.append(f'{m.group(0)}\n{" " * indent}')
+ elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
+ output.append(m.group(0))
+ elif name in ('lend', 'dend', 'tend'):
+ indent -= 4
+ output.append(m.group(0))
+ elif name in ('sep',):
+ output.append(f'{m.group(1)}\n{" " * indent}')
+ elif name in ('dsep',):
+ output.append(f'{m.group(1)} ')
+ break
+
+ return ''.join(output)
diff --git a/.venv/lib/python3.12/site-packages/soupsieve/py.typed b/.venv/lib/python3.12/site-packages/soupsieve/py.typed
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/soupsieve/py.typed
diff --git a/.venv/lib/python3.12/site-packages/soupsieve/util.py b/.venv/lib/python3.12/site-packages/soupsieve/util.py
new file mode 100644
index 00000000..9b2e64df
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/soupsieve/util.py
@@ -0,0 +1,117 @@
+"""Utility."""
+from __future__ import annotations
+from functools import wraps, lru_cache
+import warnings
+import re
+from typing import Callable, Any
+
+DEBUG = 0x00001
+
+RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
+
+UC_A = ord('A')
+UC_Z = ord('Z')
+
+
+@lru_cache(maxsize=512)
+def lower(string: str) -> str:
+ """Lower."""
+
+ new_string = []
+ for c in string:
+ o = ord(c)
+ new_string.append(chr(o + 32) if UC_A <= o <= UC_Z else c)
+ return ''.join(new_string)
+
+
+class SelectorSyntaxError(Exception):
+ """Syntax error in a CSS selector."""
+
+ def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
+ """Initialize."""
+
+ self.line = None
+ self.col = None
+ self.context = None
+
+ if pattern is not None and index is not None:
+ # Format pattern to show line and column position
+ self.context, self.line, self.col = get_pattern_context(pattern, index)
+ msg = f'{msg}\n line {self.line}:\n{self.context}'
+
+ super().__init__(msg)
+
+
+def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]: # pragma: no cover
+ """
+ Raise a `DeprecationWarning` when wrapped function/method is called.
+
+ Usage:
+
+ @deprecated("This method will be removed in version X; use Y instead.")
+ def some_method()"
+ pass
+ """
+
+ def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]:
+ @wraps(func)
+ def _deprecated_func(*args: Any, **kwargs: Any) -> Any:
+ warnings.warn(
+ f"'{func.__name__}' is deprecated. {message}",
+ category=DeprecationWarning,
+ stacklevel=stacklevel
+ )
+ return func(*args, **kwargs)
+ return _deprecated_func
+ return _wrapper
+
+
+def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no cover
+ """Warn deprecated."""
+
+ warnings.warn(
+ message,
+ category=DeprecationWarning,
+ stacklevel=stacklevel
+ )
+
+
+def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
+ """Get the pattern context."""
+
+ last = 0
+ current_line = 1
+ col = 1
+ text = [] # type: list[str]
+ line = 1
+ offset = None # type: int | None
+
+ # Split pattern by newline and handle the text before the newline
+ for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
+ linetext = pattern[last:m.start(0)]
+ if not len(m.group(0)) and not len(text):
+ indent = ''
+ offset = -1
+ col = index - last + 1
+ elif last <= index < m.end(0):
+ indent = '--> '
+ offset = (-1 if index > m.start(0) else 0) + 3
+ col = index - last + 1
+ else:
+ indent = ' '
+ offset = None
+ if len(text):
+ # Regardless of whether we are presented with `\r\n`, `\r`, or `\n`,
+ # we will render the output with just `\n`. We will still log the column
+ # correctly though.
+ text.append('\n')
+ text.append(f'{indent}{linetext}')
+ if offset is not None:
+ text.append('\n')
+ text.append(' ' * (col + offset) + '^')
+ line = current_line
+
+ current_line += 1
+ last = m.end(0)
+
+ return ''.join(text), line, col