about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/lxml/html
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/html')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py10
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/__init__.py1923
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/_diffcommand.py86
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/_html5builder.py100
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/_setmixin.py56
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/builder.py133
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/clean.py21
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/defs.py135
-rwxr-xr-x.venv/lib/python3.12/site-packages/lxml/html/diff.cpython-312-x86_64-linux-gnu.sobin0 -> 360632 bytes
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/diff.py878
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/formfill.py299
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/html5parser.py260
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/soupparser.py314
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/html/usedoctest.py13
14 files changed, 4228 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py b/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py
new file mode 100644
index 00000000..c35365d0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py
@@ -0,0 +1,10 @@
+__doc__ = """Legacy interface to the BeautifulSoup HTML parser.
+"""
+
+__all__ = ["parse", "convert_tree"]
+
+from .soupparser import convert_tree, parse as _parse
+
+def parse(file, beautifulsoup=None, makeelement=None):
+    root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement)
+    return root.getroot()
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/__init__.py b/.venv/lib/python3.12/site-packages/lxml/html/__init__.py
new file mode 100644
index 00000000..ec55d678
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/__init__.py
@@ -0,0 +1,1923 @@
+# Copyright (c) 2004 Ian Bicking. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# 3. Neither the name of Ian Bicking nor the names of its contributors may
+# be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IAN BICKING OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""The ``lxml.html`` tool set for HTML handling.
+"""
+
+
+__all__ = [
+    'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring',
+    'tostring', 'Element', 'defs', 'open_in_browser', 'submit_form',
+    'find_rel_links', 'find_class', 'make_links_absolute',
+    'resolve_base_href', 'iterlinks', 'rewrite_links', 'parse']
+
+
+import copy
+import re
+
+from collections.abc import MutableMapping, MutableSet
+from functools import partial
+from urllib.parse import urljoin
+
+from .. import etree
+from . import defs
+from ._setmixin import SetMixin
+
+
+def __fix_docstring(s):
+    # TODO: remove and clean up doctests
+    if not s:
+        return s
+    sub = re.compile(r"^(\s*)u'", re.M).sub
+    return sub(r"\1'", s)
+
+
+XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
+
+_rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]|descendant-or-self::x:a[@rel]",
+                               namespaces={'x':XHTML_NAMESPACE})
+_options_xpath = etree.XPath("descendant-or-self::option|descendant-or-self::x:option",
+                             namespaces={'x':XHTML_NAMESPACE})
+_forms_xpath = etree.XPath("descendant-or-self::form|descendant-or-self::x:form",
+                           namespaces={'x':XHTML_NAMESPACE})
+#_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'})
+_class_xpath = etree.XPath("descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), concat(' ', $class_name, ' '))]")
+_id_xpath = etree.XPath("descendant-or-self::*[@id=$id]")
+_collect_string_content = etree.XPath("string()")
+_iter_css_urls = re.compile(r'url\(('+'["][^"]*["]|'+"['][^']*[']|"+r'[^)]*)\)', re.I).finditer
+_iter_css_imports = re.compile(r'@import "(.*?)"').finditer
+_label_xpath = etree.XPath("//label[@for=$id]|//x:label[@for=$id]",
+                           namespaces={'x':XHTML_NAMESPACE})
+_archive_re = re.compile(r'[^ ]+')
+_parse_meta_refresh_url = re.compile(
+    r'[^;=]*;\s*(?:url\s*=\s*)?(?P<url>.*)$', re.I).search
+
+
+def _unquote_match(s, pos):
+    if s[:1] == '"' and s[-1:] == '"' or s[:1] == "'" and s[-1:] == "'":
+        return s[1:-1], pos+1
+    else:
+        return s,pos
+
+
+def _transform_result(typ, result):
+    """Convert the result back into the input type.
+    """
+    if issubclass(typ, bytes):
+        return tostring(result, encoding='utf-8')
+    elif issubclass(typ, str):
+        return tostring(result, encoding='unicode')
+    else:
+        return result
+
+
+def _nons(tag):
+    if isinstance(tag, str):
+        if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE:
+            return tag.split('}')[-1]
+    return tag
+
+
+class Classes(MutableSet):
+    """Provides access to an element's class attribute as a set-like collection.
+    Usage::
+
+        >>> el = fromstring('<p class="hidden large">Text</p>')
+        >>> classes = el.classes  # or: classes = Classes(el.attrib)
+        >>> classes |= ['block', 'paragraph']
+        >>> el.get('class')
+        'hidden large block paragraph'
+        >>> classes.toggle('hidden')
+        False
+        >>> el.get('class')
+        'large block paragraph'
+        >>> classes -= ('some', 'classes', 'block')
+        >>> el.get('class')
+        'large paragraph'
+    """
+    def __init__(self, attributes):
+        self._attributes = attributes
+        self._get_class_value = partial(attributes.get, 'class', '')
+
+    def add(self, value):
+        """
+        Add a class.
+
+        This has no effect if the class is already present.
+        """
+        if not value or re.search(r'\s', value):
+            raise ValueError("Invalid class name: %r" % value)
+        classes = self._get_class_value().split()
+        if value in classes:
+            return
+        classes.append(value)
+        self._attributes['class'] = ' '.join(classes)
+
+    def discard(self, value):
+        """
+        Remove a class if it is currently present.
+
+        If the class is not present, do nothing.
+        """
+        if not value or re.search(r'\s', value):
+            raise ValueError("Invalid class name: %r" % value)
+        classes = [name for name in self._get_class_value().split()
+                   if name != value]
+        if classes:
+            self._attributes['class'] = ' '.join(classes)
+        elif 'class' in self._attributes:
+            del self._attributes['class']
+
+    def remove(self, value):
+        """
+        Remove a class; it must currently be present.
+
+        If the class is not present, raise a KeyError.
+        """
+        if not value or re.search(r'\s', value):
+            raise ValueError("Invalid class name: %r" % value)
+        super().remove(value)
+
+    def __contains__(self, name):
+        classes = self._get_class_value()
+        return name in classes and name in classes.split()
+
+    def __iter__(self):
+        return iter(self._get_class_value().split())
+
+    def __len__(self):
+        return len(self._get_class_value().split())
+
+    # non-standard methods
+
+    def update(self, values):
+        """
+        Add all names from 'values'.
+        """
+        classes = self._get_class_value().split()
+        extended = False
+        for value in values:
+            if value not in classes:
+                classes.append(value)
+                extended = True
+        if extended:
+            self._attributes['class'] = ' '.join(classes)
+
+    def toggle(self, value):
+        """
+        Add a class name if it isn't there yet, or remove it if it exists.
+
+        Returns true if the class was added (and is now enabled) and
+        false if it was removed (and is now disabled).
+        """
+        if not value or re.search(r'\s', value):
+            raise ValueError("Invalid class name: %r" % value)
+        classes = self._get_class_value().split()
+        try:
+            classes.remove(value)
+            enabled = False
+        except ValueError:
+            classes.append(value)
+            enabled = True
+        if classes:
+            self._attributes['class'] = ' '.join(classes)
+        else:
+            del self._attributes['class']
+        return enabled
+
+
+class HtmlMixin:
+
+    def set(self, key, value=None):
+        """set(self, key, value=None)
+
+        Sets an element attribute.  If no value is provided, or if the value is None,
+        creates a 'boolean' attribute without value, e.g. "<form novalidate></form>"
+        for ``form.set('novalidate')``.
+        """
+        super().set(key, value)
+
+    @property
+    def classes(self):
+        """
+        A set-like wrapper around the 'class' attribute.
+        """
+        return Classes(self.attrib)
+
+    @classes.setter
+    def classes(self, classes):
+        assert isinstance(classes, Classes)  # only allow "el.classes |= ..." etc.
+        value = classes._get_class_value()
+        if value:
+            self.set('class', value)
+        elif self.get('class') is not None:
+            del self.attrib['class']
+
+    @property
+    def base_url(self):
+        """
+        Returns the base URL, given when the page was parsed.
+
+        Use with ``urlparse.urljoin(el.base_url, href)`` to get
+        absolute URLs.
+        """
+        return self.getroottree().docinfo.URL
+
+    @property
+    def forms(self):
+        """
+        Return a list of all the forms
+        """
+        return _forms_xpath(self)
+
+    @property
+    def body(self):
+        """
+        Return the <body> element.  Can be called from a child element
+        to get the document's head.
+        """
+        return self.xpath('//body|//x:body', namespaces={'x':XHTML_NAMESPACE})[0]
+
+    @property
+    def head(self):
+        """
+        Returns the <head> element.  Can be called from a child
+        element to get the document's head.
+        """
+        return self.xpath('//head|//x:head', namespaces={'x':XHTML_NAMESPACE})[0]
+
+    @property
+    def label(self):
+        """
+        Get or set any <label> element associated with this element.
+        """
+        id = self.get('id')
+        if not id:
+            return None
+        result = _label_xpath(self, id=id)
+        if not result:
+            return None
+        else:
+            return result[0]
+
+    @label.setter
+    def label(self, label):
+        id = self.get('id')
+        if not id:
+            raise TypeError(
+                "You cannot set a label for an element (%r) that has no id"
+                % self)
+        if _nons(label.tag) != 'label':
+            raise TypeError(
+                "You can only assign label to a label element (not %r)"
+                % label)
+        label.set('for', id)
+
+    @label.deleter
+    def label(self):
+        label = self.label
+        if label is not None:
+            del label.attrib['for']
+
+    def drop_tree(self):
+        """
+        Removes this element from the tree, including its children and
+        text.  The tail text is joined to the previous element or
+        parent.
+        """
+        parent = self.getparent()
+        assert parent is not None
+        if self.tail:
+            previous = self.getprevious()
+            if previous is None:
+                parent.text = (parent.text or '') + self.tail
+            else:
+                previous.tail = (previous.tail or '') + self.tail
+        parent.remove(self)
+
+    def drop_tag(self):
+        """
+        Remove the tag, but not its children or text.  The children and text
+        are merged into the parent.
+
+        Example::
+
+            >>> h = fragment_fromstring('<div>Hello <b>World!</b></div>')
+            >>> h.find('.//b').drop_tag()
+            >>> print(tostring(h, encoding='unicode'))
+            <div>Hello World!</div>
+        """
+        parent = self.getparent()
+        assert parent is not None
+        previous = self.getprevious()
+        if self.text and isinstance(self.tag, str):
+            # not a Comment, etc.
+            if previous is None:
+                parent.text = (parent.text or '') + self.text
+            else:
+                previous.tail = (previous.tail or '') + self.text
+        if self.tail:
+            if len(self):
+                last = self[-1]
+                last.tail = (last.tail or '') + self.tail
+            elif previous is None:
+                parent.text = (parent.text or '') + self.tail
+            else:
+                previous.tail = (previous.tail or '') + self.tail
+        index = parent.index(self)
+        parent[index:index+1] = self[:]
+
+    def find_rel_links(self, rel):
+        """
+        Find any links like ``<a rel="{rel}">...</a>``; returns a list of elements.
+        """
+        rel = rel.lower()
+        return [el for el in _rel_links_xpath(self)
+                if el.get('rel').lower() == rel]
+
+    def find_class(self, class_name):
+        """
+        Find any elements with the given class name.
+        """
+        return _class_xpath(self, class_name=class_name)
+
+    def get_element_by_id(self, id, *default):
+        """
+        Get the first element in a document with the given id.  If none is
+        found, return the default argument if provided or raise KeyError
+        otherwise.
+
+        Note that there can be more than one element with the same id,
+        and this isn't uncommon in HTML documents found in the wild.
+        Browsers return only the first match, and this function does
+        the same.
+        """
+        try:
+            # FIXME: should this check for multiple matches?
+            # browsers just return the first one
+            return _id_xpath(self, id=id)[0]
+        except IndexError:
+            if default:
+                return default[0]
+            else:
+                raise KeyError(id)
+
+    def text_content(self):
+        """
+        Return the text content of the tag (and the text in any children).
+        """
+        return _collect_string_content(self)
+
+    def cssselect(self, expr, translator='html'):
+        """
+        Run the CSS expression on this element and its children,
+        returning a list of the results.
+
+        Equivalent to lxml.cssselect.CSSSelect(expr, translator='html')(self)
+        -- note that pre-compiling the expression can provide a substantial
+        speedup.
+        """
+        # Do the import here to make the dependency optional.
+        from lxml.cssselect import CSSSelector
+        return CSSSelector(expr, translator=translator)(self)
+
+    ########################################
+    ## Link functions
+    ########################################
+
+    def make_links_absolute(self, base_url=None, resolve_base_href=True,
+                            handle_failures=None):
+        """
+        Make all links in the document absolute, given the
+        ``base_url`` for the document (the full URL where the document
+        came from), or if no ``base_url`` is given, then the ``.base_url``
+        of the document.
+
+        If ``resolve_base_href`` is true, then any ``<base href>``
+        tags in the document are used *and* removed from the document.
+        If it is false then any such tag is ignored.
+
+        If ``handle_failures`` is None (default), a failure to process
+        a URL will abort the processing.  If set to 'ignore', errors
+        are ignored.  If set to 'discard', failing URLs will be removed.
+        """
+        if base_url is None:
+            base_url = self.base_url
+            if base_url is None:
+                raise TypeError(
+                    "No base_url given, and the document has no base_url")
+        if resolve_base_href:
+            self.resolve_base_href()
+
+        if handle_failures == 'ignore':
+            def link_repl(href):
+                try:
+                    return urljoin(base_url, href)
+                except ValueError:
+                    return href
+        elif handle_failures == 'discard':
+            def link_repl(href):
+                try:
+                    return urljoin(base_url, href)
+                except ValueError:
+                    return None
+        elif handle_failures is None:
+            def link_repl(href):
+                return urljoin(base_url, href)
+        else:
+            raise ValueError(
+                "unexpected value for handle_failures: %r" % handle_failures)
+
+        self.rewrite_links(link_repl)
+
+    def resolve_base_href(self, handle_failures=None):
+        """
+        Find any ``<base href>`` tag in the document, and apply its
+        values to all links found in the document.  Also remove the
+        tag once it has been applied.
+
+        If ``handle_failures`` is None (default), a failure to process
+        a URL will abort the processing.  If set to 'ignore', errors
+        are ignored.  If set to 'discard', failing URLs will be removed.
+        """
+        base_href = None
+        basetags = self.xpath('//base[@href]|//x:base[@href]',
+                              namespaces={'x': XHTML_NAMESPACE})
+        for b in basetags:
+            base_href = b.get('href')
+            b.drop_tree()
+        if not base_href:
+            return
+        self.make_links_absolute(base_href, resolve_base_href=False,
+                                 handle_failures=handle_failures)
+
+    def iterlinks(self):
+        """
+        Yield (element, attribute, link, pos), where attribute may be None
+        (indicating the link is in the text).  ``pos`` is the position
+        where the link occurs; often 0, but sometimes something else in
+        the case of links in stylesheets or style tags.
+
+        Note: <base href> is *not* taken into account in any way.  The
+        link you get is exactly the link in the document.
+
+        Note: multiple links inside of a single text string or
+        attribute value are returned in reversed order.  This makes it
+        possible to replace or delete them from the text string value
+        based on their reported text positions.  Otherwise, a
+        modification at one text position can change the positions of
+        links reported later on.
+        """
+        link_attrs = defs.link_attrs
+        for el in self.iter(etree.Element):
+            attribs = el.attrib
+            tag = _nons(el.tag)
+            if tag == 'object':
+                codebase = None
+                ## <object> tags have attributes that are relative to
+                ## codebase
+                if 'codebase' in attribs:
+                    codebase = el.get('codebase')
+                    yield (el, 'codebase', codebase, 0)
+                for attrib in ('classid', 'data'):
+                    if attrib in attribs:
+                        value = el.get(attrib)
+                        if codebase is not None:
+                            value = urljoin(codebase, value)
+                        yield (el, attrib, value, 0)
+                if 'archive' in attribs:
+                    for match in _archive_re.finditer(el.get('archive')):
+                        value = match.group(0)
+                        if codebase is not None:
+                            value = urljoin(codebase, value)
+                        yield (el, 'archive', value, match.start())
+            else:
+                for attrib in link_attrs:
+                    if attrib in attribs:
+                        yield (el, attrib, attribs[attrib], 0)
+            if tag == 'meta':
+                http_equiv = attribs.get('http-equiv', '').lower()
+                if http_equiv == 'refresh':
+                    content = attribs.get('content', '')
+                    match = _parse_meta_refresh_url(content)
+                    url = (match.group('url') if match else content).strip()
+                    # unexpected content means the redirect won't work, but we might
+                    # as well be permissive and return the entire string.
+                    if url:
+                        url, pos = _unquote_match(
+                            url, match.start('url') if match else content.find(url))
+                        yield (el, 'content', url, pos)
+            elif tag == 'param':
+                valuetype = el.get('valuetype') or ''
+                if valuetype.lower() == 'ref':
+                    ## FIXME: while it's fine we *find* this link,
+                    ## according to the spec we aren't supposed to
+                    ## actually change the value, including resolving
+                    ## it.  It can also still be a link, even if it
+                    ## doesn't have a valuetype="ref" (which seems to be the norm)
+                    ## http://www.w3.org/TR/html401/struct/objects.html#adef-valuetype
+                    yield (el, 'value', el.get('value'), 0)
+            elif tag == 'style' and el.text:
+                urls = [
+                    # (start_pos, url)
+                    _unquote_match(match.group(1), match.start(1))[::-1]
+                    for match in _iter_css_urls(el.text)
+                    ] + [
+                    (match.start(1), match.group(1))
+                    for match in _iter_css_imports(el.text)
+                    ]
+                if urls:
+                    # sort by start pos to bring both match sets back into order
+                    # and reverse the list to report correct positions despite
+                    # modifications
+                    urls.sort(reverse=True)
+                    for start, url in urls:
+                        yield (el, None, url, start)
+            if 'style' in attribs:
+                urls = list(_iter_css_urls(attribs['style']))
+                if urls:
+                    # return in reversed order to simplify in-place modifications
+                    for match in urls[::-1]:
+                        url, start = _unquote_match(match.group(1), match.start(1))
+                        yield (el, 'style', url, start)
+
+    def rewrite_links(self, link_repl_func, resolve_base_href=True,
+                      base_href=None):
+        """
+        Rewrite all the links in the document.  For each link
+        ``link_repl_func(link)`` will be called, and the return value
+        will replace the old link.
+
+        Note that links may not be absolute (unless you first called
+        ``make_links_absolute()``), and may be internal (e.g.,
+        ``'#anchor'``).  They can also be values like
+        ``'mailto:email'`` or ``'javascript:expr'``.
+
+        If you give ``base_href`` then all links passed to
+        ``link_repl_func()`` will take that into account.
+
+        If the ``link_repl_func`` returns None, the attribute or
+        tag text will be removed completely.
+        """
+        if base_href is not None:
+            # FIXME: this can be done in one pass with a wrapper
+            # around link_repl_func
+            self.make_links_absolute(
+                base_href, resolve_base_href=resolve_base_href)
+        elif resolve_base_href:
+            self.resolve_base_href()
+
+        for el, attrib, link, pos in self.iterlinks():
+            new_link = link_repl_func(link.strip())
+            if new_link == link:
+                continue
+            if new_link is None:
+                # Remove the attribute or element content
+                if attrib is None:
+                    el.text = ''
+                else:
+                    del el.attrib[attrib]
+                continue
+
+            if attrib is None:
+                new = el.text[:pos] + new_link + el.text[pos+len(link):]
+                el.text = new
+            else:
+                cur = el.get(attrib)
+                if not pos and len(cur) == len(link):
+                    new = new_link  # most common case
+                else:
+                    new = cur[:pos] + new_link + cur[pos+len(link):]
+                el.set(attrib, new)
+
+
+class _MethodFunc:
+    """
+    An object that represents a method on an element as a function;
+    the function takes either an element or an HTML string.  It
+    returns whatever the function normally returns, or if the function
+    works in-place (and so returns None) it returns a serialized form
+    of the resulting document.
+    """
+    def __init__(self, name, copy=False, source_class=HtmlMixin):
+        self.name = name
+        self.copy = copy
+        self.__doc__ = getattr(source_class, self.name).__doc__
+    def __call__(self, doc, *args, **kw):
+        result_type = type(doc)
+        if isinstance(doc, (str, bytes)):
+            if 'copy' in kw:
+                raise TypeError(
+                    "The keyword 'copy' can only be used with element inputs to %s, not a string input" % self.name)
+            doc = fromstring(doc, **kw)
+        else:
+            if 'copy' in kw:
+                make_a_copy = kw.pop('copy')
+            else:
+                make_a_copy = self.copy
+            if make_a_copy:
+                doc = copy.deepcopy(doc)
+        meth = getattr(doc, self.name)
+        result = meth(*args, **kw)
+        # FIXME: this None test is a bit sloppy
+        if result is None:
+            # Then return what we got in
+            return _transform_result(result_type, doc)
+        else:
+            return result
+
+
+find_rel_links = _MethodFunc('find_rel_links', copy=False)
+find_class = _MethodFunc('find_class', copy=False)
+make_links_absolute = _MethodFunc('make_links_absolute', copy=True)
+resolve_base_href = _MethodFunc('resolve_base_href', copy=True)
+iterlinks = _MethodFunc('iterlinks', copy=False)
+rewrite_links = _MethodFunc('rewrite_links', copy=True)
+
+
+class HtmlComment(HtmlMixin, etree.CommentBase):
+    pass
+
+
+class HtmlElement(HtmlMixin, etree.ElementBase):
+    pass
+
+
+class HtmlProcessingInstruction(HtmlMixin, etree.PIBase):
+    pass
+
+
+class HtmlEntity(HtmlMixin, etree.EntityBase):
+    pass
+
+
+class HtmlElementClassLookup(etree.CustomElementClassLookup):
+    """A lookup scheme for HTML Element classes.
+
+    To create a lookup instance with different Element classes, pass a tag
+    name mapping of Element classes in the ``classes`` keyword argument and/or
+    a tag name mapping of Mixin classes in the ``mixins`` keyword argument.
+    The special key '*' denotes a Mixin class that should be mixed into all
+    Element classes.
+    """
+    _default_element_classes = {}
+
+    def __init__(self, classes=None, mixins=None):
+        etree.CustomElementClassLookup.__init__(self)
+        if classes is None:
+            classes = self._default_element_classes.copy()
+        if mixins:
+            mixers = {}
+            for name, value in mixins:
+                if name == '*':
+                    for n in classes.keys():
+                        mixers.setdefault(n, []).append(value)
+                else:
+                    mixers.setdefault(name, []).append(value)
+            for name, mix_bases in mixers.items():
+                cur = classes.get(name, HtmlElement)
+                bases = tuple(mix_bases + [cur])
+                classes[name] = type(cur.__name__, bases, {})
+        self._element_classes = classes
+
+    def lookup(self, node_type, document, namespace, name):
+        if node_type == 'element':
+            return self._element_classes.get(name.lower(), HtmlElement)
+        elif node_type == 'comment':
+            return HtmlComment
+        elif node_type == 'PI':
+            return HtmlProcessingInstruction
+        elif node_type == 'entity':
+            return HtmlEntity
+        # Otherwise normal lookup
+        return None
+
+
+################################################################################
+# parsing
+################################################################################
+
+_looks_like_full_html_unicode = re.compile(
+    r'^\s*<(?:html|!doctype)', re.I).match
+_looks_like_full_html_bytes = re.compile(
+    br'^\s*<(?:html|!doctype)', re.I).match
+
+
+def document_fromstring(html, parser=None, ensure_head_body=False, **kw):
+    if parser is None:
+        parser = html_parser
+    value = etree.fromstring(html, parser, **kw)
+    if value is None:
+        raise etree.ParserError(
+            "Document is empty")
+    if ensure_head_body and value.find('head') is None:
+        value.insert(0, Element('head'))
+    if ensure_head_body and value.find('body') is None:
+        value.append(Element('body'))
+    return value
+
+
+def fragments_fromstring(html, no_leading_text=False, base_url=None,
+                         parser=None, **kw):
+    """Parses several HTML elements, returning a list of elements.
+
+    The first item in the list may be a string.
+    If no_leading_text is true, then it will be an error if there is
+    leading text, and it will always be a list of only elements.
+
+    base_url will set the document's base_url attribute
+    (and the tree's docinfo.URL).
+    """
+    if parser is None:
+        parser = html_parser
+    # FIXME: check what happens when you give html with a body, head, etc.
+    if isinstance(html, bytes):
+        if not _looks_like_full_html_bytes(html):
+            # can't use %-formatting in early Py3 versions
+            html = (b'<html><body>' + html +
+                    b'</body></html>')
+    else:
+        if not _looks_like_full_html_unicode(html):
+            html = '<html><body>%s</body></html>' % html
+    doc = document_fromstring(html, parser=parser, base_url=base_url, **kw)
+    assert _nons(doc.tag) == 'html'
+    bodies = [e for e in doc if _nons(e.tag) == 'body']
+    assert len(bodies) == 1, ("too many bodies: %r in %r" % (bodies, html))
+    body = bodies[0]
+    elements = []
+    if no_leading_text and body.text and body.text.strip():
+        raise etree.ParserError(
+            "There is leading text: %r" % body.text)
+    if body.text and body.text.strip():
+        elements.append(body.text)
+    elements.extend(body)
+    # FIXME: removing the reference to the parent artificial document
+    # would be nice
+    return elements
+
+
+def fragment_fromstring(html, create_parent=False, base_url=None,
+                        parser=None, **kw):
+    """
+    Parses a single HTML element; it is an error if there is more than
+    one element, or if anything but whitespace precedes or follows the
+    element.
+
+    If ``create_parent`` is true (or is a tag name) then a parent node
+    will be created to encapsulate the HTML in a single element.  In this
+    case, leading or trailing text is also allowed, as are multiple elements
+    as result of the parsing.
+
+    Passing a ``base_url`` will set the document's ``base_url`` attribute
+    (and the tree's docinfo.URL).
+    """
+    if parser is None:
+        parser = html_parser
+
+    accept_leading_text = bool(create_parent)
+
+    elements = fragments_fromstring(
+        html, parser=parser, no_leading_text=not accept_leading_text,
+        base_url=base_url, **kw)
+
+    if create_parent:
+        if not isinstance(create_parent, str):
+            create_parent = 'div'
+        new_root = Element(create_parent)
+        if elements:
+            if isinstance(elements[0], str):
+                new_root.text = elements[0]
+                del elements[0]
+            new_root.extend(elements)
+        return new_root
+
+    if not elements:
+        raise etree.ParserError('No elements found')
+    if len(elements) > 1:
+        raise etree.ParserError(
+            "Multiple elements found (%s)"
+            % ', '.join([_element_name(e) for e in elements]))
+    el = elements[0]
+    if el.tail and el.tail.strip():
+        raise etree.ParserError(
+            "Element followed by text: %r" % el.tail)
+    el.tail = None
+    return el
+
+
+def fromstring(html, base_url=None, parser=None, **kw):
+    """
+    Parse the html, returning a single element/document.
+
+    This tries to minimally parse the chunk of text, without knowing if it
+    is a fragment or a document.
+
+    base_url will set the document's base_url attribute (and the tree's docinfo.URL)
+    """
+    if parser is None:
+        parser = html_parser
+    if isinstance(html, bytes):
+        is_full_html = _looks_like_full_html_bytes(html)
+    else:
+        is_full_html = _looks_like_full_html_unicode(html)
+    doc = document_fromstring(html, parser=parser, base_url=base_url, **kw)
+    if is_full_html:
+        return doc
+    # otherwise, lets parse it out...
+    bodies = doc.findall('body')
+    if not bodies:
+        bodies = doc.findall('{%s}body' % XHTML_NAMESPACE)
+    if bodies:
+        body = bodies[0]
+        if len(bodies) > 1:
+            # Somehow there are multiple bodies, which is bad, but just
+            # smash them into one body
+            for other_body in bodies[1:]:
+                if other_body.text:
+                    if len(body):
+                        body[-1].tail = (body[-1].tail or '') + other_body.text
+                    else:
+                        body.text = (body.text or '') + other_body.text
+                body.extend(other_body)
+                # We'll ignore tail
+                # I guess we are ignoring attributes too
+                other_body.drop_tree()
+    else:
+        body = None
+    heads = doc.findall('head')
+    if not heads:
+        heads = doc.findall('{%s}head' % XHTML_NAMESPACE)
+    if heads:
+        # Well, we have some sort of structure, so lets keep it all
+        head = heads[0]
+        if len(heads) > 1:
+            for other_head in heads[1:]:
+                head.extend(other_head)
+                # We don't care about text or tail in a head
+                other_head.drop_tree()
+        return doc
+    if body is None:
+        return doc
+    if (len(body) == 1 and (not body.text or not body.text.strip())
+        and (not body[-1].tail or not body[-1].tail.strip())):
+        # The body has just one element, so it was probably a single
+        # element passed in
+        return body[0]
+    # Now we have a body which represents a bunch of tags which have the
+    # content that was passed in.  We will create a fake container, which
+    # is the body tag, except <body> implies too much structure.
+    if _contains_block_level_tag(body):
+        body.tag = 'div'
+    else:
+        body.tag = 'span'
+    return body
+
+
+def parse(filename_or_url, parser=None, base_url=None, **kw):
+    """
+    Parse a filename, URL, or file-like object into an HTML document
+    tree.  Note: this returns a tree, not an element.  Use
+    ``parse(...).getroot()`` to get the document root.
+
+    You can override the base URL with the ``base_url`` keyword.  This
+    is most useful when parsing from a file-like object.
+    """
+    if parser is None:
+        parser = html_parser
+    return etree.parse(filename_or_url, parser, base_url=base_url, **kw)
+
+
+def _contains_block_level_tag(el):
+    # FIXME: I could do this with XPath, but would that just be
+    # unnecessarily slow?
+    for el in el.iter(etree.Element):
+        if _nons(el.tag) in defs.block_tags:
+            return True
+    return False
+
+
+def _element_name(el):
+    if isinstance(el, etree.CommentBase):
+        return 'comment'
+    elif isinstance(el, str):
+        return 'string'
+    else:
+        return _nons(el.tag)
+
+
+################################################################################
+# form handling
+################################################################################
+
+class FormElement(HtmlElement):
+    """
+    Represents a <form> element.
+    """
+
+    @property
+    def inputs(self):
+        """
+        Returns an accessor for all the input elements in the form.
+
+        See `InputGetter` for more information about the object.
+        """
+        return InputGetter(self)
+
+    @property
+    def fields(self):
+        """
+        Dictionary-like object that represents all the fields in this
+        form.  You can set values in this dictionary to effect the
+        form.
+        """
+        return FieldsDict(self.inputs)
+
+    @fields.setter
+    def fields(self, value):
+        fields = self.fields
+        prev_keys = fields.keys()
+        for key, value in value.items():
+            if key in prev_keys:
+                prev_keys.remove(key)
+            fields[key] = value
+        for key in prev_keys:
+            if key is None:
+                # Case of an unnamed input; these aren't really
+                # expressed in form_values() anyway.
+                continue
+            fields[key] = None
+
+    def _name(self):
+        if self.get('name'):
+            return self.get('name')
+        elif self.get('id'):
+            return '#' + self.get('id')
+        iter_tags = self.body.iter
+        forms = list(iter_tags('form'))
+        if not forms:
+            forms = list(iter_tags('{%s}form' % XHTML_NAMESPACE))
+        return str(forms.index(self))
+
+    def form_values(self):
+        """
+        Return a list of tuples of the field values for the form.
+        This is suitable to be passed to ``urllib.urlencode()``.
+        """
+        results = []
+        for el in self.inputs:
+            name = el.name
+            if not name or 'disabled' in el.attrib:
+                continue
+            tag = _nons(el.tag)
+            if tag == 'textarea':
+                results.append((name, el.value))
+            elif tag == 'select':
+                value = el.value
+                if el.multiple:
+                    for v in value:
+                        results.append((name, v))
+                elif value is not None:
+                    results.append((name, el.value))
+            else:
+                assert tag == 'input', (
+                    "Unexpected tag: %r" % el)
+                if el.checkable and not el.checked:
+                    continue
+                if el.type in ('submit', 'image', 'reset', 'file'):
+                    continue
+                value = el.value
+                if value is not None:
+                    results.append((name, el.value))
+        return results
+
+    @property
+    def action(self):
+        """
+        Get/set the form's ``action`` attribute.
+        """
+        base_url = self.base_url
+        action = self.get('action')
+        if base_url and action is not None:
+            return urljoin(base_url, action)
+        else:
+            return action
+
+    @action.setter
+    def action(self, value):
+        self.set('action', value)
+
+    @action.deleter
+    def action(self):
+        attrib = self.attrib
+        if 'action' in attrib:
+            del attrib['action']
+
+    @property
+    def method(self):
+        """
+        Get/set the form's method.  Always returns a capitalized
+        string, and defaults to ``'GET'``
+        """
+        return self.get('method', 'GET').upper()
+
+    @method.setter
+    def method(self, value):
+        self.set('method', value.upper())
+
+
+HtmlElementClassLookup._default_element_classes['form'] = FormElement
+
+
+def submit_form(form, extra_values=None, open_http=None):
+    """
+    Helper function to submit a form.  Returns a file-like object, as from
+    ``urllib.urlopen()``.  This object also has a ``.geturl()`` function,
+    which shows the URL if there were any redirects.
+
+    You can use this like::
+
+        form = doc.forms[0]
+        form.inputs['foo'].value = 'bar' # etc
+        response = form.submit()
+        doc = parse(response)
+        doc.make_links_absolute(response.geturl())
+
+    To change the HTTP requester, pass a function as ``open_http`` keyword
+    argument that opens the URL for you.  The function must have the following
+    signature::
+
+        open_http(method, URL, values)
+
+    The action is one of 'GET' or 'POST', the URL is the target URL as a
+    string, and the values are a sequence of ``(name, value)`` tuples with the
+    form data.
+    """
+    values = form.form_values()
+    if extra_values:
+        if hasattr(extra_values, 'items'):
+            extra_values = extra_values.items()
+        values.extend(extra_values)
+    if open_http is None:
+        open_http = open_http_urllib
+    if form.action:
+        url = form.action
+    else:
+        url = form.base_url
+    return open_http(form.method, url, values)
+
+
+def open_http_urllib(method, url, values):
+    if not url:
+        raise ValueError("cannot submit, no URL provided")
+    ## FIXME: should test that it's not a relative URL or something
+    try:
+        from urllib import urlencode, urlopen
+    except ImportError: # Python 3
+        from urllib.request import urlopen
+        from urllib.parse import urlencode
+    if method == 'GET':
+        if '?' in url:
+            url += '&'
+        else:
+            url += '?'
+        url += urlencode(values)
+        data = None
+    else:
+        data = urlencode(values)
+        if not isinstance(data, bytes):
+            data = data.encode('ASCII')
+    return urlopen(url, data)
+
+
+class FieldsDict(MutableMapping):
+
+    def __init__(self, inputs):
+        self.inputs = inputs
+    def __getitem__(self, item):
+        return self.inputs[item].value
+    def __setitem__(self, item, value):
+        self.inputs[item].value = value
+    def __delitem__(self, item):
+        raise KeyError(
+            "You cannot remove keys from ElementDict")
+    def keys(self):
+        return self.inputs.keys()
+    def __contains__(self, item):
+        return item in self.inputs
+    def __iter__(self):
+        return iter(self.inputs.keys())
+    def __len__(self):
+        return len(self.inputs)
+
+    def __repr__(self):
+        return '<%s for form %s>' % (
+            self.__class__.__name__,
+            self.inputs.form._name())
+
+
+class InputGetter:
+
+    """
+    An accessor that represents all the input fields in a form.
+
+    You can get fields by name from this, with
+    ``form.inputs['field_name']``.  If there are a set of checkboxes
+    with the same name, they are returned as a list (a `CheckboxGroup`
+    which also allows value setting).  Radio inputs are handled
+    similarly.  Use ``.keys()`` and ``.items()`` to process all fields
+    in this way.
+
+    You can also iterate over this to get all input elements.  This
+    won't return the same thing as if you get all the names, as
+    checkboxes and radio elements are returned individually.
+    """
+
+    def __init__(self, form):
+        self.form = form
+
+    def __repr__(self):
+        return '<%s for form %s>' % (
+            self.__class__.__name__,
+            self.form._name())
+
+    ## FIXME: there should be more methods, and it's unclear if this is
+    ## a dictionary-like object or list-like object
+
+    def __getitem__(self, name):
+        fields = [field for field in self if field.name == name]
+        if not fields:
+            raise KeyError("No input element with the name %r" % name)
+
+        input_type = fields[0].get('type')
+        if input_type == 'radio' and len(fields) > 1:
+            group = RadioGroup(fields)
+            group.name = name
+            return group
+        elif input_type == 'checkbox' and len(fields) > 1:
+            group = CheckboxGroup(fields)
+            group.name = name
+            return group
+        else:
+            # I don't like throwing away elements like this
+            return fields[0]
+
+    def __contains__(self, name):
+        for field in self:
+            if field.name == name:
+                return True
+        return False
+
+    def keys(self):
+        """
+        Returns all unique field names, in document order.
+
+        :return: A list of all unique field names.
+        """
+        names = []
+        seen = {None}
+        for el in self:
+            name = el.name
+            if name not in seen:
+                names.append(name)
+                seen.add(name)
+        return names
+
+    def items(self):
+        """
+        Returns all fields with their names, similar to dict.items().
+
+        :return: A list of (name, field) tuples.
+        """
+        items = []
+        seen = set()
+        for el in self:
+            name = el.name
+            if name not in seen:
+                seen.add(name)
+                items.append((name, self[name]))
+        return items
+
+    def __iter__(self):
+        return self.form.iter('select', 'input', 'textarea')
+
+    def __len__(self):
+        return sum(1 for _ in self)
+
+
+class InputMixin:
+    """
+    Mix-in for all input elements (input, select, and textarea)
+    """
+    @property
+    def name(self):
+        """
+        Get/set the name of the element
+        """
+        return self.get('name')
+
+    @name.setter
+    def name(self, value):
+        self.set('name', value)
+
+    @name.deleter
+    def name(self):
+        attrib = self.attrib
+        if 'name' in attrib:
+            del attrib['name']
+
+    def __repr__(self):
+        type_name = getattr(self, 'type', None)
+        if type_name:
+            type_name = ' type=%r' % type_name
+        else:
+            type_name = ''
+        return '<%s %x name=%r%s>' % (
+            self.__class__.__name__, id(self), self.name, type_name)
+
+
+class TextareaElement(InputMixin, HtmlElement):
+    """
+    ``<textarea>`` element.  You can get the name with ``.name`` and
+    get/set the value with ``.value``
+    """
+    @property
+    def value(self):
+        """
+        Get/set the value (which is the contents of this element)
+        """
+        content = self.text or ''
+        if self.tag.startswith("{%s}" % XHTML_NAMESPACE):
+            serialisation_method = 'xml'
+        else:
+            serialisation_method = 'html'
+        for el in self:
+            # it's rare that we actually get here, so let's not use ''.join()
+            content += etree.tostring(
+                el, method=serialisation_method, encoding='unicode')
+        return content
+
+    @value.setter
+    def value(self, value):
+        del self[:]
+        self.text = value
+
+    @value.deleter
+    def value(self):
+        self.text = ''
+        del self[:]
+
+
+HtmlElementClassLookup._default_element_classes['textarea'] = TextareaElement
+
+
+class SelectElement(InputMixin, HtmlElement):
+    """
+    ``<select>`` element.  You can get the name with ``.name``.
+
+    ``.value`` will be the value of the selected option, unless this
+    is a multi-select element (``<select multiple>``), in which case
+    it will be a set-like object.  In either case ``.value_options``
+    gives the possible values.
+
+    The boolean attribute ``.multiple`` shows if this is a
+    multi-select.
+    """
+    @property
+    def value(self):
+        """
+        Get/set the value of this select (the selected option).
+
+        If this is a multi-select, this is a set-like object that
+        represents all the selected options.
+        """
+        if self.multiple:
+            return MultipleSelectOptions(self)
+        options = _options_xpath(self)
+
+        try:
+            selected_option = next(el for el in reversed(options) if el.get('selected') is not None)
+        except StopIteration:
+            try:
+                selected_option = next(el for el in options if el.get('disabled') is None)
+            except StopIteration:
+                return None
+        value = selected_option.get('value')
+        if value is None:
+            value = (selected_option.text or '').strip()
+        return value
+
+    @value.setter
+    def value(self, value):
+        if self.multiple:
+            if isinstance(value, str):
+                raise TypeError("You must pass in a sequence")
+            values = self.value
+            values.clear()
+            values.update(value)
+            return
+        checked_option = None
+        if value is not None:
+            for el in _options_xpath(self):
+                opt_value = el.get('value')
+                if opt_value is None:
+                    opt_value = (el.text or '').strip()
+                if opt_value == value:
+                    checked_option = el
+                    break
+            else:
+                raise ValueError(
+                    "There is no option with the value of %r" % value)
+        for el in _options_xpath(self):
+            if 'selected' in el.attrib:
+                del el.attrib['selected']
+        if checked_option is not None:
+            checked_option.set('selected', '')
+
+    @value.deleter
+    def value(self):
+        # FIXME: should del be allowed at all?
+        if self.multiple:
+            self.value.clear()
+        else:
+            self.value = None
+
+    @property
+    def value_options(self):
+        """
+        All the possible values this select can have (the ``value``
+        attribute of all the ``<option>`` elements.
+        """
+        options = []
+        for el in _options_xpath(self):
+            value = el.get('value')
+            if value is None:
+                value = (el.text or '').strip()
+            options.append(value)
+        return options
+
+    @property
+    def multiple(self):
+        """
+        Boolean attribute: is there a ``multiple`` attribute on this element.
+        """
+        return 'multiple' in self.attrib
+
+    @multiple.setter
+    def multiple(self, value):
+        if value:
+            self.set('multiple', '')
+        elif 'multiple' in self.attrib:
+            del self.attrib['multiple']
+
+
+HtmlElementClassLookup._default_element_classes['select'] = SelectElement
+
+
+class MultipleSelectOptions(SetMixin):
+    """
+    Represents all the selected options in a ``<select multiple>`` element.
+
+    You can add to this set-like option to select an option, or remove
+    to unselect the option.
+    """
+
+    def __init__(self, select):
+        self.select = select
+
+    @property
+    def options(self):
+        """
+        Iterator of all the ``<option>`` elements.
+        """
+        return iter(_options_xpath(self.select))
+
+    def __iter__(self):
+        for option in self.options:
+            if 'selected' in option.attrib:
+                opt_value = option.get('value')
+                if opt_value is None:
+                    opt_value = (option.text or '').strip()
+                yield opt_value
+
+    def add(self, item):
+        for option in self.options:
+            opt_value = option.get('value')
+            if opt_value is None:
+                opt_value = (option.text or '').strip()
+            if opt_value == item:
+                option.set('selected', '')
+                break
+        else:
+            raise ValueError(
+                "There is no option with the value %r" % item)
+
+    def remove(self, item):
+        for option in self.options:
+            opt_value = option.get('value')
+            if opt_value is None:
+                opt_value = (option.text or '').strip()
+            if opt_value == item:
+                if 'selected' in option.attrib:
+                    del option.attrib['selected']
+                else:
+                    raise ValueError(
+                        "The option %r is not currently selected" % item)
+                break
+        else:
+            raise ValueError(
+                "There is not option with the value %r" % item)
+
+    def __repr__(self):
+        return '<%s {%s} for select name=%r>' % (
+            self.__class__.__name__,
+            ', '.join([repr(v) for v in self]),
+            self.select.name)
+
+
+class RadioGroup(list):
+    """
+    This object represents several ``<input type=radio>`` elements
+    that have the same name.
+
+    You can use this like a list, but also use the property
+    ``.value`` to check/uncheck inputs.  Also you can use
+    ``.value_options`` to get the possible values.
+    """
+    @property
+    def value(self):
+        """
+        Get/set the value, which checks the radio with that value (and
+        unchecks any other value).
+        """
+        for el in self:
+            if 'checked' in el.attrib:
+                return el.get('value')
+        return None
+
+    @value.setter
+    def value(self, value):
+        checked_option = None
+        if value is not None:
+            for el in self:
+                if el.get('value') == value:
+                    checked_option = el
+                    break
+            else:
+                raise ValueError("There is no radio input with the value %r" % value)
+        for el in self:
+            if 'checked' in el.attrib:
+                del el.attrib['checked']
+        if checked_option is not None:
+            checked_option.set('checked', '')
+
+    @value.deleter
+    def value(self):
+        self.value = None
+
+    @property
+    def value_options(self):
+        """
+        Returns a list of all the possible values.
+        """
+        return [el.get('value') for el in self]
+
+    def __repr__(self):
+        return '%s(%s)' % (
+            self.__class__.__name__,
+            list.__repr__(self))
+
+
+class CheckboxGroup(list):
+    """
+    Represents a group of checkboxes (``<input type=checkbox>``) that
+    have the same name.
+
+    In addition to using this like a list, the ``.value`` attribute
+    returns a set-like object that you can add to or remove from to
+    check and uncheck checkboxes.  You can also use ``.value_options``
+    to get the possible values.
+    """
+    @property
+    def value(self):
+        """
+        Return a set-like object that can be modified to check or
+        uncheck individual checkboxes according to their value.
+        """
+        return CheckboxValues(self)
+
+    @value.setter
+    def value(self, value):
+        values = self.value
+        values.clear()
+        if not hasattr(value, '__iter__'):
+            raise ValueError(
+                "A CheckboxGroup (name=%r) must be set to a sequence (not %r)"
+                % (self[0].name, value))
+        values.update(value)
+
+    @value.deleter
+    def value(self):
+        self.value.clear()
+
+    @property
+    def value_options(self):
+        """
+        Returns a list of all the possible values.
+        """
+        return [el.get('value') for el in self]
+
+    def __repr__(self):
+        return '%s(%s)' % (
+            self.__class__.__name__, list.__repr__(self))
+
+
+class CheckboxValues(SetMixin):
+    """
+    Represents the values of the checked checkboxes in a group of
+    checkboxes with the same name.
+    """
+
+    def __init__(self, group):
+        self.group = group
+
+    def __iter__(self):
+        return iter([
+            el.get('value')
+            for el in self.group
+            if 'checked' in el.attrib])
+
+    def add(self, value):
+        for el in self.group:
+            if el.get('value') == value:
+                el.set('checked', '')
+                break
+        else:
+            raise KeyError("No checkbox with value %r" % value)
+
+    def remove(self, value):
+        for el in self.group:
+            if el.get('value') == value:
+                if 'checked' in el.attrib:
+                    del el.attrib['checked']
+                else:
+                    raise KeyError(
+                        "The checkbox with value %r was already unchecked" % value)
+                break
+        else:
+            raise KeyError(
+                "No checkbox with value %r" % value)
+
+    def __repr__(self):
+        return '<%s {%s} for checkboxes name=%r>' % (
+            self.__class__.__name__,
+            ', '.join([repr(v) for v in self]),
+            self.group.name)
+
+
+class InputElement(InputMixin, HtmlElement):
+    """
+    Represents an ``<input>`` element.
+
+    You can get the type with ``.type`` (which is lower-cased and
+    defaults to ``'text'``).
+
+    Also you can get and set the value with ``.value``
+
+    Checkboxes and radios have the attribute ``input.checkable ==
+    True`` (for all others it is false) and a boolean attribute
+    ``.checked``.
+
+    """
+
+    ## FIXME: I'm a little uncomfortable with the use of .checked
+    @property
+    def value(self):
+        """
+        Get/set the value of this element, using the ``value`` attribute.
+
+        Also, if this is a checkbox and it has no value, this defaults
+        to ``'on'``.  If it is a checkbox or radio that is not
+        checked, this returns None.
+        """
+        if self.checkable:
+            if self.checked:
+                return self.get('value') or 'on'
+            else:
+                return None
+        return self.get('value')
+
+    @value.setter
+    def value(self, value):
+        if self.checkable:
+            if not value:
+                self.checked = False
+            else:
+                self.checked = True
+                if isinstance(value, str):
+                    self.set('value', value)
+        else:
+            self.set('value', value)
+
+    @value.deleter
+    def value(self):
+        if self.checkable:
+            self.checked = False
+        else:
+            if 'value' in self.attrib:
+                del self.attrib['value']
+
+    @property
+    def type(self):
+        """
+        Return the type of this element (using the type attribute).
+        """
+        return self.get('type', 'text').lower()
+
+    @type.setter
+    def type(self, value):
+        self.set('type', value)
+
+    @property
+    def checkable(self):
+        """
+        Boolean: can this element be checked?
+        """
+        return self.type in ('checkbox', 'radio')
+
+    @property
+    def checked(self):
+        """
+        Boolean attribute to get/set the presence of the ``checked``
+        attribute.
+
+        You can only use this on checkable input types.
+        """
+        if not self.checkable:
+            raise AttributeError('Not a checkable input type')
+        return 'checked' in self.attrib
+
+    @checked.setter
+    def checked(self, value):
+        if not self.checkable:
+            raise AttributeError('Not a checkable input type')
+        if value:
+            self.set('checked', '')
+        else:
+            attrib = self.attrib
+            if 'checked' in attrib:
+                del attrib['checked']
+
+
+HtmlElementClassLookup._default_element_classes['input'] = InputElement
+
+
+class LabelElement(HtmlElement):
+    """
+    Represents a ``<label>`` element.
+
+    Label elements are linked to other elements with their ``for``
+    attribute.  You can access this element with ``label.for_element``.
+    """
+    @property
+    def for_element(self):
+        """
+        Get/set the element this label points to.  Return None if it
+        can't be found.
+        """
+        id = self.get('for')
+        if not id:
+            return None
+        return self.body.get_element_by_id(id)
+
+    @for_element.setter
+    def for_element(self, other):
+        id = other.get('id')
+        if not id:
+            raise TypeError(
+                "Element %r has no id attribute" % other)
+        self.set('for', id)
+
+    @for_element.deleter
+    def for_element(self):
+        attrib = self.attrib
+        if 'id' in attrib:
+            del attrib['id']
+
+
+HtmlElementClassLookup._default_element_classes['label'] = LabelElement
+
+
+############################################################
+## Serialization
+############################################################
+
+def html_to_xhtml(html):
+    """Convert all tags in an HTML tree to XHTML by moving them to the
+    XHTML namespace.
+    """
+    try:
+        html = html.getroot()
+    except AttributeError:
+        pass
+    prefix = "{%s}" % XHTML_NAMESPACE
+    for el in html.iter(etree.Element):
+        tag = el.tag
+        if tag[0] != '{':
+            el.tag = prefix + tag
+
+
+def xhtml_to_html(xhtml):
+    """Convert all tags in an XHTML tree to HTML by removing their
+    XHTML namespace.
+    """
+    try:
+        xhtml = xhtml.getroot()
+    except AttributeError:
+        pass
+    prefix = "{%s}" % XHTML_NAMESPACE
+    prefix_len = len(prefix)
+    for el in xhtml.iter(prefix + "*"):
+        el.tag = el.tag[prefix_len:]
+
+
+# This isn't a general match, but it's a match for what libxml2
+# specifically serialises:
+__str_replace_meta_content_type = re.compile(
+    r'<meta http-equiv="Content-Type"[^>]*>').sub
+__bytes_replace_meta_content_type = re.compile(
+    br'<meta http-equiv="Content-Type"[^>]*>').sub
+
+
+def tostring(doc, pretty_print=False, include_meta_content_type=False,
+             encoding=None, method="html", with_tail=True, doctype=None):
+    """Return an HTML string representation of the document.
+
+    Note: if include_meta_content_type is true this will create a
+    ``<meta http-equiv="Content-Type" ...>`` tag in the head;
+    regardless of the value of include_meta_content_type any existing
+    ``<meta http-equiv="Content-Type" ...>`` tag will be removed
+
+    The ``encoding`` argument controls the output encoding (defaults to
+    ASCII, with &#...; character references for any characters outside
+    of ASCII).  Note that you can pass the name ``'unicode'`` as
+    ``encoding`` argument to serialise to a Unicode string.
+
+    The ``method`` argument defines the output method.  It defaults to
+    'html', but can also be 'xml' for xhtml output, or 'text' to
+    serialise to plain text without markup.
+
+    To leave out the tail text of the top-level element that is being
+    serialised, pass ``with_tail=False``.
+
+    The ``doctype`` option allows passing in a plain string that will
+    be serialised before the XML tree.  Note that passing in non
+    well-formed content here will make the XML output non well-formed.
+    Also, an existing doctype in the document tree will not be removed
+    when serialising an ElementTree instance.
+
+    Example::
+
+        >>> from lxml import html
+        >>> root = html.fragment_fromstring('<p>Hello<br>world!</p>')
+
+        >>> html.tostring(root)
+        b'<p>Hello<br>world!</p>'
+        >>> html.tostring(root, method='html')
+        b'<p>Hello<br>world!</p>'
+
+        >>> html.tostring(root, method='xml')
+        b'<p>Hello<br/>world!</p>'
+
+        >>> html.tostring(root, method='text')
+        b'Helloworld!'
+
+        >>> html.tostring(root, method='text', encoding='unicode')
+        u'Helloworld!'
+
+        >>> root = html.fragment_fromstring('<div><p>Hello<br>world!</p>TAIL</div>')
+        >>> html.tostring(root[0], method='text', encoding='unicode')
+        u'Helloworld!TAIL'
+
+        >>> html.tostring(root[0], method='text', encoding='unicode', with_tail=False)
+        u'Helloworld!'
+
+        >>> doc = html.document_fromstring('<p>Hello<br>world!</p>')
+        >>> html.tostring(doc, method='html', encoding='unicode')
+        u'<html><body><p>Hello<br>world!</p></body></html>'
+
+        >>> print(html.tostring(doc, method='html', encoding='unicode',
+        ...          doctype='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'
+        ...                  ' "http://www.w3.org/TR/html4/strict.dtd">'))
+        <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+        <html><body><p>Hello<br>world!</p></body></html>
+    """
+    html = etree.tostring(doc, method=method, pretty_print=pretty_print,
+                          encoding=encoding, with_tail=with_tail,
+                          doctype=doctype)
+    if method == 'html' and not include_meta_content_type:
+        if isinstance(html, str):
+            html = __str_replace_meta_content_type('', html)
+        else:
+            html = __bytes_replace_meta_content_type(b'', html)
+    return html
+
+
+tostring.__doc__ = __fix_docstring(tostring.__doc__)
+
+
+def open_in_browser(doc, encoding=None):
+    """
+    Open the HTML document in a web browser, saving it to a temporary
+    file to open it.  Note that this does not delete the file after
+    use.  This is mainly meant for debugging.
+    """
+    import os
+    import webbrowser
+    import tempfile
+    if not isinstance(doc, etree._ElementTree):
+        doc = etree.ElementTree(doc)
+    handle, fn = tempfile.mkstemp(suffix='.html')
+    f = os.fdopen(handle, 'wb')
+    try:
+        doc.write(f, method="html", encoding=encoding or doc.docinfo.encoding or "UTF-8")
+    finally:
+        # we leak the file itself here, but we should at least close it
+        f.close()
+    url = 'file://' + fn.replace(os.path.sep, '/')
+    print(url)
+    webbrowser.open(url)
+
+
+################################################################################
+# configure Element class lookup
+################################################################################
+
+class HTMLParser(etree.HTMLParser):
+    """An HTML parser that is configured to return lxml.html Element
+    objects.
+    """
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.set_element_class_lookup(HtmlElementClassLookup())
+
+
+class XHTMLParser(etree.XMLParser):
+    """An XML parser that is configured to return lxml.html Element
+    objects.
+
+    Note that this parser is not really XHTML aware unless you let it
+    load a DTD that declares the HTML entities.  To do this, make sure
+    you have the XHTML DTDs installed in your catalogs, and create the
+    parser like this::
+
+        >>> parser = XHTMLParser(load_dtd=True)
+
+    If you additionally want to validate the document, use this::
+
+        >>> parser = XHTMLParser(dtd_validation=True)
+
+    For catalog support, see http://www.xmlsoft.org/catalog.html.
+    """
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.set_element_class_lookup(HtmlElementClassLookup())
+
+
+def Element(*args, **kw):
+    """Create a new HTML Element.
+
+    This can also be used for XHTML documents.
+    """
+    v = html_parser.makeelement(*args, **kw)
+    return v
+
+
+html_parser = HTMLParser()
+xhtml_parser = XHTMLParser()
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/_diffcommand.py b/.venv/lib/python3.12/site-packages/lxml/html/_diffcommand.py
new file mode 100644
index 00000000..b045a2b1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/_diffcommand.py
@@ -0,0 +1,86 @@
+import optparse
+import sys
+import re
+import os
+from .diff import htmldiff
+
+description = """\
+"""
+
+parser = optparse.OptionParser(
+    usage="%prog [OPTIONS] FILE1 FILE2\n"
+    "%prog --annotate [OPTIONS] INFO1 FILE1 INFO2 FILE2 ...",
+    description=description,
+    )
+
+parser.add_option(
+    '-o', '--output',
+    metavar="FILE",
+    dest="output",
+    default="-",
+    help="File to write the difference to",
+    )
+
+parser.add_option(
+    '-a', '--annotation',
+    action="store_true",
+    dest="annotation",
+    help="Do an annotation")
+
+def main(args=None):
+    if args is None:
+        args = sys.argv[1:]
+    options, args = parser.parse_args(args)
+    if options.annotation:
+        return annotate(options, args)
+    if len(args) != 2:
+        print('Error: you must give two files')
+        parser.print_help()
+        sys.exit(1)
+    file1, file2 = args
+    input1 = read_file(file1)
+    input2 = read_file(file2)
+    body1 = split_body(input1)[1]
+    pre, body2, post = split_body(input2)
+    result = htmldiff(body1, body2)
+    result = pre + result + post
+    if options.output == '-':
+        if not result.endswith('\n'):
+            result += '\n'
+        sys.stdout.write(result)
+    else:
+        with open(options.output, 'wb') as f:
+            f.write(result)
+
+def read_file(filename):
+    if filename == '-':
+        c = sys.stdin.read()
+    elif not os.path.exists(filename):
+        raise OSError(
+            "Input file %s does not exist" % filename)
+    else:
+        with open(filename, 'rb') as f:
+            c = f.read()
+    return c
+
+body_start_re = re.compile(
+    r"<body.*?>", re.I|re.S)
+body_end_re = re.compile(
+    r"</body.*?>", re.I|re.S)
+    
+def split_body(html):
+    pre = post = ''
+    match = body_start_re.search(html)
+    if match:
+        pre = html[:match.end()]
+        html = html[match.end():]
+    match = body_end_re.search(html)
+    if match:
+        post = html[match.start():]
+        html = html[:match.start()]
+    return pre, html, post
+
+def annotate(options, args):
+    print("Not yet implemented")
+    sys.exit(1)
+    
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/_html5builder.py b/.venv/lib/python3.12/site-packages/lxml/html/_html5builder.py
new file mode 100644
index 00000000..a88ed944
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/_html5builder.py
@@ -0,0 +1,100 @@
+"""
+Legacy module - don't use in new code!
+
+html5lib now has its own proper implementation.
+
+This module implements a tree builder for html5lib that generates lxml
+html element trees.  This module uses camelCase as it follows the
+html5lib style guide.
+"""
+
+from html5lib.treebuilders import _base, etree as etree_builders
+from lxml import html, etree
+
+
+class DocumentType:
+
+    def __init__(self, name, publicId, systemId):
+        self.name = name
+        self.publicId = publicId
+        self.systemId = systemId
+
+class Document:
+
+    def __init__(self):
+        self._elementTree = None
+        self.childNodes = []
+
+    def appendChild(self, element):
+        self._elementTree.getroot().addnext(element._element)
+
+
+class TreeBuilder(_base.TreeBuilder):
+    documentClass = Document
+    doctypeClass = DocumentType
+    elementClass = None
+    commentClass = None
+    fragmentClass = Document
+
+    def __init__(self, *args, **kwargs):
+        html_builder = etree_builders.getETreeModule(html, fullTree=False)
+        etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
+        self.elementClass = html_builder.Element
+        self.commentClass = etree_builder.Comment
+        _base.TreeBuilder.__init__(self, *args, **kwargs)
+
+    def reset(self):
+        _base.TreeBuilder.reset(self)
+        self.rootInserted = False
+        self.initialComments = []
+        self.doctype = None
+
+    def getDocument(self):
+        return self.document._elementTree
+
+    def getFragment(self):
+        fragment = []
+        element = self.openElements[0]._element
+        if element.text:
+            fragment.append(element.text)
+        fragment.extend(element.getchildren())
+        if element.tail:
+            fragment.append(element.tail)
+        return fragment
+
+    def insertDoctype(self, name, publicId, systemId):
+        doctype = self.doctypeClass(name, publicId, systemId)
+        self.doctype = doctype
+
+    def insertComment(self, data, parent=None):
+        if not self.rootInserted:
+            self.initialComments.append(data)
+        else:
+            _base.TreeBuilder.insertComment(self, data, parent)
+
+    def insertRoot(self, name):
+        buf = []
+        if self.doctype and self.doctype.name:
+            buf.append('<!DOCTYPE %s' % self.doctype.name)
+            if self.doctype.publicId is not None or self.doctype.systemId is not None:
+                buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
+                                                  self.doctype.systemId))
+            buf.append('>')
+        buf.append('<html></html>')
+        root = html.fromstring(''.join(buf))
+
+        # Append the initial comments:
+        for comment in self.initialComments:
+            root.addprevious(etree.Comment(comment))
+
+        # Create the root document and add the ElementTree to it
+        self.document = self.documentClass()
+        self.document._elementTree = root.getroottree()
+
+        # Add the root element to the internal child/open data structures
+        root_element = self.elementClass(name)
+        root_element._element = root
+        self.document.childNodes.append(root_element)
+        self.openElements.append(root_element)
+
+        self.rootInserted = True
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/_setmixin.py b/.venv/lib/python3.12/site-packages/lxml/html/_setmixin.py
new file mode 100644
index 00000000..0be2bac4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/_setmixin.py
@@ -0,0 +1,56 @@
+try:
+    from collections.abc import MutableSet
+except ImportError:
+    from collections.abc import MutableSet
+
+
+class SetMixin(MutableSet):
+
+    """
+    Mix-in for sets.  You must define __iter__, add, remove
+    """
+
+    def __len__(self):
+        length = 0
+        for item in self:
+            length += 1
+        return length
+
+    def __contains__(self, item):
+        for has_item in self:
+            if item == has_item:
+                return True
+        return False
+
+    issubset = MutableSet.__le__
+    issuperset = MutableSet.__ge__
+
+    union = MutableSet.__or__
+    intersection = MutableSet.__and__
+    difference = MutableSet.__sub__
+    symmetric_difference = MutableSet.__xor__
+
+    def copy(self):
+        return set(self)
+
+    def update(self, other):
+        self |= other
+
+    def intersection_update(self, other):
+        self &= other
+
+    def difference_update(self, other):
+        self -= other
+
+    def symmetric_difference_update(self, other):
+        self ^= other
+
+    def discard(self, item):
+        try:
+            self.remove(item)
+        except KeyError:
+            pass
+
+    @classmethod
+    def _from_iterable(cls, it):
+        return set(it)
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/builder.py b/.venv/lib/python3.12/site-packages/lxml/html/builder.py
new file mode 100644
index 00000000..8a074ecf
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/builder.py
@@ -0,0 +1,133 @@
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+# Copyright (c) 1999-2004 by Fredrik Lundh
+# --------------------------------------------------------------------
+
+"""
+A set of HTML generator tags for building HTML documents.
+
+Usage::
+
+    >>> from lxml.html.builder import *
+    >>> html = HTML(
+    ...            HEAD( TITLE("Hello World") ),
+    ...            BODY( CLASS("main"),
+    ...                  H1("Hello World !")
+    ...            )
+    ...        )
+
+    >>> import lxml.etree
+    >>> print lxml.etree.tostring(html, pretty_print=True)
+    <html>
+      <head>
+        <title>Hello World</title>
+      </head>
+      <body class="main">
+        <h1>Hello World !</h1>
+      </body>
+    </html>
+
+"""
+
+from lxml.builder import ElementMaker
+from lxml.html import html_parser
+
+E = ElementMaker(makeelement=html_parser.makeelement)
+
+# elements
+A = E.a  #: anchor
+ABBR = E.abbr  #: abbreviated form (e.g., WWW, HTTP, etc.)
+ACRONYM = E.acronym  #: 
+ADDRESS = E.address  #: information on author
+APPLET = E.applet  #: Java applet (DEPRECATED)
+AREA = E.area  #: client-side image map area
+B = E.b  #: bold text style
+BASE = E.base  #: document base URI
+BASEFONT = E.basefont  #: base font size (DEPRECATED)
+BDO = E.bdo  #: I18N BiDi over-ride
+BIG = E.big  #: large text style
+BLOCKQUOTE = E.blockquote  #: long quotation
+BODY = E.body  #: document body
+BR = E.br  #: forced line break
+BUTTON = E.button  #: push button
+CAPTION = E.caption  #: table caption
+CENTER = E.center  #: shorthand for DIV align=center (DEPRECATED)
+CITE = E.cite  #: citation
+CODE = E.code  #: computer code fragment
+COL = E.col  #: table column
+COLGROUP = E.colgroup  #: table column group
+DD = E.dd  #: definition description
+DEL = getattr(E, 'del')  #: deleted text
+DFN = E.dfn  #: instance definition
+DIR = E.dir  #: directory list (DEPRECATED)
+DIV = E.div  #: generic language/style container
+DL = E.dl  #: definition list
+DT = E.dt  #: definition term
+EM = E.em  #: emphasis
+FIELDSET = E.fieldset  #: form control group
+FONT = E.font  #: local change to font (DEPRECATED)
+FORM = E.form  #: interactive form
+FRAME = E.frame  #: subwindow
+FRAMESET = E.frameset  #: window subdivision
+H1 = E.h1  #: heading
+H2 = E.h2  #: heading
+H3 = E.h3  #: heading
+H4 = E.h4  #: heading
+H5 = E.h5  #: heading
+H6 = E.h6  #: heading
+HEAD = E.head  #: document head
+HR = E.hr  #: horizontal rule
+HTML = E.html  #: document root element
+I = E.i  #: italic text style
+IFRAME = E.iframe  #: inline subwindow
+IMG = E.img  #: Embedded image
+INPUT = E.input  #: form control
+INS = E.ins  #: inserted text
+ISINDEX = E.isindex  #: single line prompt (DEPRECATED)
+KBD = E.kbd  #: text to be entered by the user
+LABEL = E.label  #: form field label text
+LEGEND = E.legend  #: fieldset legend
+LI = E.li  #: list item
+LINK = E.link  #: a media-independent link
+MAP = E.map  #: client-side image map
+MENU = E.menu  #: menu list (DEPRECATED)
+META = E.meta  #: generic metainformation
+NOFRAMES = E.noframes  #: alternate content container for non frame-based rendering
+NOSCRIPT = E.noscript  #: alternate content container for non script-based rendering
+OBJECT = E.object  #: generic embedded object
+OL = E.ol  #: ordered list
+OPTGROUP = E.optgroup  #: option group
+OPTION = E.option  #: selectable choice
+P = E.p  #: paragraph
+PARAM = E.param  #: named property value
+PRE = E.pre  #: preformatted text
+Q = E.q  #: short inline quotation
+S = E.s  #: strike-through text style (DEPRECATED)
+SAMP = E.samp  #: sample program output, scripts, etc.
+SCRIPT = E.script  #: script statements
+SELECT = E.select  #: option selector
+SMALL = E.small  #: small text style
+SPAN = E.span  #: generic language/style container
+STRIKE = E.strike  #: strike-through text (DEPRECATED)
+STRONG = E.strong  #: strong emphasis
+STYLE = E.style  #: style info
+SUB = E.sub  #: subscript
+SUP = E.sup  #: superscript
+TABLE = E.table  #: 
+TBODY = E.tbody  #: table body
+TD = E.td  #: table data cell
+TEXTAREA = E.textarea  #: multi-line text field
+TFOOT = E.tfoot  #: table footer
+TH = E.th  #: table header cell
+THEAD = E.thead  #: table header
+TITLE = E.title  #: document title
+TR = E.tr  #: table row
+TT = E.tt  #: teletype or monospaced text style
+U = E.u  #: underlined text style (DEPRECATED)
+UL = E.ul  #: unordered list
+VAR = E.var  #: instance of a variable or program argument
+
+# attributes (only reserved words are included here)
+ATTR = dict
+def CLASS(v): return {'class': v}
+def FOR(v): return {'for': v}
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/clean.py b/.venv/lib/python3.12/site-packages/lxml/html/clean.py
new file mode 100644
index 00000000..d4b9e96d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/clean.py
@@ -0,0 +1,21 @@
+# cython: language_level=3str
+
+"""Backward-compatibility module for lxml_html_clean"""
+
+try:
+    from lxml_html_clean import *
+
+    __all__ = [
+        "clean_html",
+        "clean",
+        "Cleaner",
+        "autolink",
+        "autolink_html",
+        "word_break",
+        "word_break_html",
+    ]
+except ImportError:
+    raise ImportError(
+        "lxml.html.clean module is now a separate project lxml_html_clean.\n"
+        "Install lxml[html_clean] or lxml_html_clean directly."
+    ) from None
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/defs.py b/.venv/lib/python3.12/site-packages/lxml/html/defs.py
new file mode 100644
index 00000000..2058ea33
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/defs.py
@@ -0,0 +1,135 @@
+# FIXME: this should all be confirmed against what a DTD says
+# (probably in a test; this may not match the DTD exactly, but we
+# should document just how it differs).
+
+"""
+Data taken from https://www.w3.org/TR/html401/index/elements.html
+and https://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements
+for html5_tags.
+"""
+
+empty_tags = frozenset([
+    'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
+    'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track'])
+
+deprecated_tags = frozenset([
+    'applet', 'basefont', 'center', 'dir', 'font', 'isindex',
+    'menu', 's', 'strike', 'u'])
+
+# archive actually takes a space-separated list of URIs
+link_attrs = frozenset([
+    'action', 'archive', 'background', 'cite', 'classid',
+    'codebase', 'data', 'href', 'longdesc', 'profile', 'src',
+    'usemap',
+    # Not standard:
+    'dynsrc', 'lowsrc',
+    # HTML5 formaction
+    'formaction'
+    ])
+
+# Not in the HTML 4 spec:
+# onerror, onresize
+event_attrs = frozenset([
+    'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror',
+    'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload',
+    'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover',
+    'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit',
+    'onunload',
+    ])
+
+safe_attrs = frozenset([
+    'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
+    'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff',
+    'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan',
+    'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype',
+    'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id',
+    'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
+    'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
+    'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape',
+    'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
+    'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
+
+# From http://htmlhelp.com/reference/html40/olist.html
+top_level_tags = frozenset([
+    'html', 'head', 'body', 'frameset',
+    ])
+
+head_tags = frozenset([
+    'base', 'isindex', 'link', 'meta', 'script', 'style', 'title',
+    ])
+
+general_block_tags = frozenset([
+    'address',
+    'blockquote',
+    'center',
+    'del',
+    'div',
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+    'hr',
+    'ins',
+    'isindex',
+    'noscript',
+    'p',
+    'pre',
+    ])
+
+list_tags = frozenset([
+    'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul',
+    ])
+
+table_tags = frozenset([
+    'table', 'caption', 'colgroup', 'col',
+    'thead', 'tfoot', 'tbody', 'tr', 'td', 'th',
+    ])
+
+# just this one from
+# http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm
+block_tags = general_block_tags | list_tags | table_tags | frozenset([
+    # Partial form tags
+    'fieldset', 'form', 'legend', 'optgroup', 'option',
+    ])
+
+form_tags = frozenset([
+    'form', 'button', 'fieldset', 'legend', 'input', 'label',
+    'select', 'optgroup', 'option', 'textarea',
+    ])
+
+special_inline_tags = frozenset([
+    'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe',
+    'img', 'map', 'area', 'object', 'param', 'q', 'script',
+    'span', 'sub', 'sup',
+    ])
+
+phrase_tags = frozenset([
+    'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em',
+    'ins', 'kbd', 'samp', 'strong', 'var',
+    ])
+
+font_style_tags = frozenset([
+    'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u',
+    ])
+
+frame_tags = frozenset([
+    'frameset', 'frame', 'noframes',
+    ])
+    
+html5_tags = frozenset([
+    'article', 'aside', 'audio', 'canvas', 'command', 'datalist',
+    'details', 'embed', 'figcaption', 'figure', 'footer', 'header',
+    'hgroup', 'keygen', 'mark', 'math', 'meter', 'nav', 'output',
+    'progress', 'rp', 'rt', 'ruby', 'section', 'source', 'summary',
+    'svg', 'time', 'track', 'video', 'wbr'
+    ])
+
+# These tags aren't standard
+nonstandard_tags = frozenset(['blink', 'marquee'])
+
+
+tags = (top_level_tags | head_tags | general_block_tags | list_tags
+        | table_tags | form_tags | special_inline_tags | phrase_tags
+        | font_style_tags | nonstandard_tags | html5_tags)
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/diff.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/lxml/html/diff.cpython-312-x86_64-linux-gnu.so
new file mode 100755
index 00000000..d43b3fef
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/diff.cpython-312-x86_64-linux-gnu.so
Binary files differdiff --git a/.venv/lib/python3.12/site-packages/lxml/html/diff.py b/.venv/lib/python3.12/site-packages/lxml/html/diff.py
new file mode 100644
index 00000000..56d28057
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/diff.py
@@ -0,0 +1,878 @@
+# cython: language_level=3
+
+
+import difflib
+from lxml import etree
+from lxml.html import fragment_fromstring
+import re
+
+__all__ = ['html_annotate', 'htmldiff']
+
+try:
+    from html import escape as html_escape
+except ImportError:
+    from cgi import escape as html_escape
+try:
+    _unicode = unicode
+except NameError:
+    # Python 3
+    _unicode = str
+try:
+    basestring
+except NameError:
+    # Python 3
+    basestring = str
+
+############################################################
+## Annotation
+############################################################
+
+def default_markup(text, version):
+    return '<span title="%s">%s</span>' % (
+        html_escape(_unicode(version), 1), text)
+
+def html_annotate(doclist, markup=default_markup):
+    """
+    doclist should be ordered from oldest to newest, like::
+
+        >>> version1 = 'Hello World'
+        >>> version2 = 'Goodbye World'
+        >>> print(html_annotate([(version1, 'version 1'),
+        ...                      (version2, 'version 2')]))
+        <span title="version 2">Goodbye</span> <span title="version 1">World</span>
+
+    The documents must be *fragments* (str/UTF8 or unicode), not
+    complete documents
+
+    The markup argument is a function to markup the spans of words.
+    This function is called like markup('Hello', 'version 2'), and
+    returns HTML.  The first argument is text and never includes any
+    markup.  The default uses a span with a title:
+
+        >>> print(default_markup('Some Text', 'by Joe'))
+        <span title="by Joe">Some Text</span>
+    """
+    # The basic strategy we have is to split the documents up into
+    # logical tokens (which are words with attached markup).  We then
+    # do diffs of each of the versions to track when a token first
+    # appeared in the document; the annotation attached to the token
+    # is the version where it first appeared.
+    tokenlist = [tokenize_annotated(doc, version)
+                 for doc, version in doclist]
+    cur_tokens = tokenlist[0]
+    for tokens in tokenlist[1:]:
+        html_annotate_merge_annotations(cur_tokens, tokens)
+        cur_tokens = tokens
+
+    # After we've tracked all the tokens, we can combine spans of text
+    # that are adjacent and have the same annotation
+    cur_tokens = compress_tokens(cur_tokens)
+    # And finally add markup
+    result = markup_serialize_tokens(cur_tokens, markup)
+    return ''.join(result).strip()
+
+def tokenize_annotated(doc, annotation): 
+    """Tokenize a document and add an annotation attribute to each token
+    """
+    tokens = tokenize(doc, include_hrefs=False)
+    for tok in tokens: 
+        tok.annotation = annotation
+    return tokens
+
+def html_annotate_merge_annotations(tokens_old, tokens_new): 
+    """Merge the annotations from tokens_old into tokens_new, when the
+    tokens in the new document already existed in the old document.
+    """
+    s = InsensitiveSequenceMatcher(a=tokens_old, b=tokens_new)
+    commands = s.get_opcodes()
+
+    for command, i1, i2, j1, j2 in commands:
+        if command == 'equal': 
+            eq_old = tokens_old[i1:i2]
+            eq_new = tokens_new[j1:j2]
+            copy_annotations(eq_old, eq_new)
+
+def copy_annotations(src, dest): 
+    """
+    Copy annotations from the tokens listed in src to the tokens in dest
+    """
+    assert len(src) == len(dest)
+    for src_tok, dest_tok in zip(src, dest): 
+        dest_tok.annotation = src_tok.annotation
+
+def compress_tokens(tokens):
+    """
+    Combine adjacent tokens when there is no HTML between the tokens, 
+    and they share an annotation
+    """
+    result = [tokens[0]] 
+    for tok in tokens[1:]: 
+        if (not result[-1].post_tags and 
+            not tok.pre_tags and 
+            result[-1].annotation == tok.annotation): 
+            compress_merge_back(result, tok)
+        else: 
+            result.append(tok)
+    return result
+
+def compress_merge_back(tokens, tok): 
+    """ Merge tok into the last element of tokens (modifying the list of
+    tokens in-place).  """
+    last = tokens[-1]
+    if type(last) is not token or type(tok) is not token: 
+        tokens.append(tok)
+    else:
+        text = _unicode(last)
+        if last.trailing_whitespace:
+            text += last.trailing_whitespace
+        text += tok
+        merged = token(text,
+                       pre_tags=last.pre_tags,
+                       post_tags=tok.post_tags,
+                       trailing_whitespace=tok.trailing_whitespace)
+        merged.annotation = last.annotation
+        tokens[-1] = merged
+    
+def markup_serialize_tokens(tokens, markup_func):
+    """
+    Serialize the list of tokens into a list of text chunks, calling
+    markup_func around text to add annotations.
+    """
+    for token in tokens:
+        yield from token.pre_tags
+        html = token.html()
+        html = markup_func(html, token.annotation)
+        if token.trailing_whitespace:
+            html += token.trailing_whitespace
+        yield html
+        yield from token.post_tags
+
+
+############################################################
+## HTML Diffs
+############################################################
+
+def htmldiff(old_html, new_html):
+    ## FIXME: this should take parsed documents too, and use their body
+    ## or other content.
+    """ Do a diff of the old and new document.  The documents are HTML
+    *fragments* (str/UTF8 or unicode), they are not complete documents
+    (i.e., no <html> tag).
+
+    Returns HTML with <ins> and <del> tags added around the
+    appropriate text.  
+
+    Markup is generally ignored, with the markup from new_html
+    preserved, and possibly some markup from old_html (though it is
+    considered acceptable to lose some of the old markup).  Only the
+    words in the HTML are diffed.  The exception is <img> tags, which
+    are treated like words, and the href attribute of <a> tags, which
+    are noted inside the tag itself when there are changes.
+    """ 
+    old_html_tokens = tokenize(old_html)
+    new_html_tokens = tokenize(new_html)
+    result = htmldiff_tokens(old_html_tokens, new_html_tokens)
+    result = ''.join(result).strip()
+    return fixup_ins_del_tags(result)
+
+def htmldiff_tokens(html1_tokens, html2_tokens):
+    """ Does a diff on the tokens themselves, returning a list of text
+    chunks (not tokens).
+    """
+    # There are several passes as we do the differences.  The tokens
+    # isolate the portion of the content we care to diff; difflib does
+    # all the actual hard work at that point.  
+    #
+    # Then we must create a valid document from pieces of both the old
+    # document and the new document.  We generally prefer to take
+    # markup from the new document, and only do a best effort attempt
+    # to keep markup from the old document; anything that we can't
+    # resolve we throw away.  Also we try to put the deletes as close
+    # to the location where we think they would have been -- because
+    # we are only keeping the markup from the new document, it can be
+    # fuzzy where in the new document the old text would have gone.
+    # Again we just do a best effort attempt.
+    s = InsensitiveSequenceMatcher(a=html1_tokens, b=html2_tokens)
+    commands = s.get_opcodes()
+    result = []
+    for command, i1, i2, j1, j2 in commands:
+        if command == 'equal':
+            result.extend(expand_tokens(html2_tokens[j1:j2], equal=True))
+            continue
+        if command == 'insert' or command == 'replace':
+            ins_tokens = expand_tokens(html2_tokens[j1:j2])
+            merge_insert(ins_tokens, result)
+        if command == 'delete' or command == 'replace':
+            del_tokens = expand_tokens(html1_tokens[i1:i2])
+            merge_delete(del_tokens, result)
+    # If deletes were inserted directly as <del> then we'd have an
+    # invalid document at this point.  Instead we put in special
+    # markers, and when the complete diffed document has been created
+    # we try to move the deletes around and resolve any problems.
+    result = cleanup_delete(result)
+
+    return result
+
+def expand_tokens(tokens, equal=False):
+    """Given a list of tokens, return a generator of the chunks of
+    text for the data in the tokens.
+    """
+    for token in tokens:
+        yield from token.pre_tags
+        if not equal or not token.hide_when_equal:
+            if token.trailing_whitespace:
+                yield token.html() + token.trailing_whitespace
+            else:
+                yield token.html()
+        yield from token.post_tags
+
+def merge_insert(ins_chunks, doc):
+    """ doc is the already-handled document (as a list of text chunks);
+    here we add <ins>ins_chunks</ins> to the end of that.  """
+    # Though we don't throw away unbalanced_start or unbalanced_end
+    # (we assume there is accompanying markup later or earlier in the
+    # document), we only put <ins> around the balanced portion.
+    unbalanced_start, balanced, unbalanced_end = split_unbalanced(ins_chunks)
+    doc.extend(unbalanced_start)
+    if doc and not doc[-1].endswith(' '):
+        # Fix up the case where the word before the insert didn't end with 
+        # a space
+        doc[-1] += ' '
+    doc.append('<ins>')
+    if balanced and balanced[-1].endswith(' '):
+        # We move space outside of </ins>
+        balanced[-1] = balanced[-1][:-1]
+    doc.extend(balanced)
+    doc.append('</ins> ')
+    doc.extend(unbalanced_end)
+
+# These are sentinels to represent the start and end of a <del>
+# segment, until we do the cleanup phase to turn them into proper
+# markup:
+class DEL_START:
+    pass
+class DEL_END:
+    pass
+
+class NoDeletes(Exception):
+    """ Raised when the document no longer contains any pending deletes
+    (DEL_START/DEL_END) """
+
+def merge_delete(del_chunks, doc):
+    """ Adds the text chunks in del_chunks to the document doc (another
+    list of text chunks) with marker to show it is a delete.
+    cleanup_delete later resolves these markers into <del> tags."""
+    doc.append(DEL_START)
+    doc.extend(del_chunks)
+    doc.append(DEL_END)
+
+def cleanup_delete(chunks):
+    """ Cleans up any DEL_START/DEL_END markers in the document, replacing
+    them with <del></del>.  To do this while keeping the document
+    valid, it may need to drop some tags (either start or end tags).
+
+    It may also move the del into adjacent tags to try to move it to a
+    similar location where it was originally located (e.g., moving a
+    delete into preceding <div> tag, if the del looks like (DEL_START,
+    'Text</div>', DEL_END)"""
+    while 1:
+        # Find a pending DEL_START/DEL_END, splitting the document
+        # into stuff-preceding-DEL_START, stuff-inside, and
+        # stuff-following-DEL_END
+        try:
+            pre_delete, delete, post_delete = split_delete(chunks)
+        except NoDeletes:
+            # Nothing found, we've cleaned up the entire doc
+            break
+        # The stuff-inside-DEL_START/END may not be well balanced
+        # markup.  First we figure out what unbalanced portions there are:
+        unbalanced_start, balanced, unbalanced_end = split_unbalanced(delete)
+        # Then we move the span forward and/or backward based on these
+        # unbalanced portions:
+        locate_unbalanced_start(unbalanced_start, pre_delete, post_delete)
+        locate_unbalanced_end(unbalanced_end, pre_delete, post_delete)
+        doc = pre_delete
+        if doc and not doc[-1].endswith(' '):
+            # Fix up case where the word before us didn't have a trailing space
+            doc[-1] += ' '
+        doc.append('<del>')
+        if balanced and balanced[-1].endswith(' '):
+            # We move space outside of </del>
+            balanced[-1] = balanced[-1][:-1]
+        doc.extend(balanced)
+        doc.append('</del> ')
+        doc.extend(post_delete)
+        chunks = doc
+    return chunks
+
+def split_unbalanced(chunks):
+    """Return (unbalanced_start, balanced, unbalanced_end), where each is
+    a list of text and tag chunks.
+
+    unbalanced_start is a list of all the tags that are opened, but
+    not closed in this span.  Similarly, unbalanced_end is a list of
+    tags that are closed but were not opened.  Extracting these might
+    mean some reordering of the chunks."""
+    start = []
+    end = []
+    tag_stack = []
+    balanced = []
+    for chunk in chunks:
+        if not chunk.startswith('<'):
+            balanced.append(chunk)
+            continue
+        endtag = chunk[1] == '/'
+        name = chunk.split()[0].strip('<>/')
+        if name in empty_tags:
+            balanced.append(chunk)
+            continue
+        if endtag:
+            if tag_stack and tag_stack[-1][0] == name:
+                balanced.append(chunk)
+                name, pos, tag = tag_stack.pop()
+                balanced[pos] = tag
+            elif tag_stack:
+                start.extend([tag for name, pos, tag in tag_stack])
+                tag_stack = []
+                end.append(chunk)
+            else:
+                end.append(chunk)
+        else:
+            tag_stack.append((name, len(balanced), chunk))
+            balanced.append(None)
+    start.extend(
+        [chunk for name, pos, chunk in tag_stack])
+    balanced = [chunk for chunk in balanced if chunk is not None]
+    return start, balanced, end
+
+def split_delete(chunks):
+    """ Returns (stuff_before_DEL_START, stuff_inside_DEL_START_END,
+    stuff_after_DEL_END).  Returns the first case found (there may be
+    more DEL_STARTs in stuff_after_DEL_END).  Raises NoDeletes if
+    there's no DEL_START found. """
+    try:
+        pos = chunks.index(DEL_START)
+    except ValueError:
+        raise NoDeletes
+    pos2 = chunks.index(DEL_END)
+    return chunks[:pos], chunks[pos+1:pos2], chunks[pos2+1:]
+
+def locate_unbalanced_start(unbalanced_start, pre_delete, post_delete):
+    """ pre_delete and post_delete implicitly point to a place in the
+    document (where the two were split).  This moves that point (by
+    popping items from one and pushing them onto the other).  It moves
+    the point to try to find a place where unbalanced_start applies.
+
+    As an example::
+
+        >>> unbalanced_start = ['<div>']
+        >>> doc = ['<p>', 'Text', '</p>', '<div>', 'More Text', '</div>']
+        >>> pre, post = doc[:3], doc[3:]
+        >>> pre, post
+        (['<p>', 'Text', '</p>'], ['<div>', 'More Text', '</div>'])
+        >>> locate_unbalanced_start(unbalanced_start, pre, post)
+        >>> pre, post
+        (['<p>', 'Text', '</p>', '<div>'], ['More Text', '</div>'])
+
+    As you can see, we moved the point so that the dangling <div> that
+    we found will be effectively replaced by the div in the original
+    document.  If this doesn't work out, we just throw away
+    unbalanced_start without doing anything.
+    """
+    while 1:
+        if not unbalanced_start:
+            # We have totally succeeded in finding the position
+            break
+        finding = unbalanced_start[0]
+        finding_name = finding.split()[0].strip('<>')
+        if not post_delete:
+            break
+        next = post_delete[0]
+        if next is DEL_START or not next.startswith('<'):
+            # Reached a word, we can't move the delete text forward
+            break
+        if next[1] == '/':
+            # Reached a closing tag, can we go further?  Maybe not...
+            break
+        name = next.split()[0].strip('<>')
+        if name == 'ins':
+            # Can't move into an insert
+            break
+        assert name != 'del', (
+            "Unexpected delete tag: %r" % next)
+        if name == finding_name:
+            unbalanced_start.pop(0)
+            pre_delete.append(post_delete.pop(0))
+        else:
+            # Found a tag that doesn't match
+            break
+
+def locate_unbalanced_end(unbalanced_end, pre_delete, post_delete):
+    """ like locate_unbalanced_start, except handling end tags and
+    possibly moving the point earlier in the document.  """
+    while 1:
+        if not unbalanced_end:
+            # Success
+            break
+        finding = unbalanced_end[-1]
+        finding_name = finding.split()[0].strip('<>/')
+        if not pre_delete:
+            break
+        next = pre_delete[-1]
+        if next is DEL_END or not next.startswith('</'):
+            # A word or a start tag
+            break
+        name = next.split()[0].strip('<>/')
+        if name == 'ins' or name == 'del':
+            # Can't move into an insert or delete
+            break
+        if name == finding_name:
+            unbalanced_end.pop()
+            post_delete.insert(0, pre_delete.pop())
+        else:
+            # Found a tag that doesn't match
+            break
+
+class token(_unicode):
+    """ Represents a diffable token, generally a word that is displayed to
+    the user.  Opening tags are attached to this token when they are
+    adjacent (pre_tags) and closing tags that follow the word
+    (post_tags).  Some exceptions occur when there are empty tags
+    adjacent to a word, so there may be close tags in pre_tags, or
+    open tags in post_tags.
+
+    We also keep track of whether the word was originally followed by
+    whitespace, even though we do not want to treat the word as
+    equivalent to a similar word that does not have a trailing
+    space."""
+
+    # When this is true, the token will be eliminated from the
+    # displayed diff if no change has occurred:
+    hide_when_equal = False
+
+    def __new__(cls, text, pre_tags=None, post_tags=None, trailing_whitespace=""):
+        obj = _unicode.__new__(cls, text)
+
+        if pre_tags is not None:
+            obj.pre_tags = pre_tags
+        else:
+            obj.pre_tags = []
+
+        if post_tags is not None:
+            obj.post_tags = post_tags
+        else:
+            obj.post_tags = []
+
+        obj.trailing_whitespace = trailing_whitespace
+
+        return obj
+
+    def __repr__(self):
+        return 'token(%s, %r, %r, %r)' % (_unicode.__repr__(self), self.pre_tags,
+                                          self.post_tags, self.trailing_whitespace)
+
+    def html(self):
+        return _unicode(self)
+
+class tag_token(token):
+
+    """ Represents a token that is actually a tag.  Currently this is just
+    the <img> tag, which takes up visible space just like a word but
+    is only represented in a document by a tag.  """
+
+    def __new__(cls, tag, data, html_repr, pre_tags=None, 
+                post_tags=None, trailing_whitespace=""):
+        obj = token.__new__(cls, "%s: %s" % (type, data), 
+                            pre_tags=pre_tags, 
+                            post_tags=post_tags, 
+                            trailing_whitespace=trailing_whitespace)
+        obj.tag = tag
+        obj.data = data
+        obj.html_repr = html_repr
+        return obj
+
+    def __repr__(self):
+        return 'tag_token(%s, %s, html_repr=%s, post_tags=%r, pre_tags=%r, trailing_whitespace=%r)' % (
+            self.tag, 
+            self.data, 
+            self.html_repr, 
+            self.pre_tags, 
+            self.post_tags, 
+            self.trailing_whitespace)
+    def html(self):
+        return self.html_repr
+
+class href_token(token):
+
+    """ Represents the href in an anchor tag.  Unlike other words, we only
+    show the href when it changes.  """
+
+    hide_when_equal = True
+
+    def html(self):
+        return ' Link: %s' % self
+
+def tokenize(html, include_hrefs=True):
+    """
+    Parse the given HTML and returns token objects (words with attached tags).
+
+    This parses only the content of a page; anything in the head is
+    ignored, and the <head> and <body> elements are themselves
+    optional.  The content is then parsed by lxml, which ensures the
+    validity of the resulting parsed document (though lxml may make
+    incorrect guesses when the markup is particular bad).
+
+    <ins> and <del> tags are also eliminated from the document, as
+    that gets confusing.
+
+    If include_hrefs is true, then the href attribute of <a> tags is
+    included as a special kind of diffable token."""
+    if etree.iselement(html):
+        body_el = html
+    else:
+        body_el = parse_html(html, cleanup=True)
+    # Then we split the document into text chunks for each tag, word, and end tag:
+    chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs)
+    # Finally re-joining them into token objects:
+    return fixup_chunks(chunks)
+
+def parse_html(html, cleanup=True):
+    """
+    Parses an HTML fragment, returning an lxml element.  Note that the HTML will be
+    wrapped in a <div> tag that was not in the original document.
+
+    If cleanup is true, make sure there's no <head> or <body>, and get
+    rid of any <ins> and <del> tags.
+    """
+    if cleanup:
+        # This removes any extra markup or structure like <head>:
+        html = cleanup_html(html)
+    return fragment_fromstring(html, create_parent=True)
+
+_body_re = re.compile(r'<body.*?>', re.I|re.S)
+_end_body_re = re.compile(r'</body.*?>', re.I|re.S)
+_ins_del_re = re.compile(r'</?(ins|del).*?>', re.I|re.S)
+
+def cleanup_html(html):
+    """ This 'cleans' the HTML, meaning that any page structure is removed
+    (only the contents of <body> are used, if there is any <body).
+    Also <ins> and <del> tags are removed.  """
+    match = _body_re.search(html)
+    if match:
+        html = html[match.end():]
+    match = _end_body_re.search(html)
+    if match:
+        html = html[:match.start()]
+    html = _ins_del_re.sub('', html)
+    return html
+    
+
+end_whitespace_re = re.compile(r'[ \t\n\r]$')
+
+def split_trailing_whitespace(word):
+    """
+    This function takes a word, such as 'test\n\n' and returns ('test','\n\n')
+    """
+    stripped_length = len(word.rstrip())
+    return word[0:stripped_length], word[stripped_length:]
+
+
+def fixup_chunks(chunks):
+    """
+    This function takes a list of chunks and produces a list of tokens.
+    """
+    tag_accum = []
+    cur_word = None
+    result = []
+    for chunk in chunks:
+        if isinstance(chunk, tuple):
+            if chunk[0] == 'img':
+                src = chunk[1]
+                tag, trailing_whitespace = split_trailing_whitespace(chunk[2])
+                cur_word = tag_token('img', src, html_repr=tag,
+                                     pre_tags=tag_accum,
+                                     trailing_whitespace=trailing_whitespace)
+                tag_accum = []
+                result.append(cur_word)
+
+            elif chunk[0] == 'href':
+                href = chunk[1]
+                cur_word = href_token(href, pre_tags=tag_accum, trailing_whitespace=" ")
+                tag_accum = []
+                result.append(cur_word)
+            continue
+
+        if is_word(chunk):
+            chunk, trailing_whitespace = split_trailing_whitespace(chunk)
+            cur_word = token(chunk, pre_tags=tag_accum, trailing_whitespace=trailing_whitespace)
+            tag_accum = []
+            result.append(cur_word)
+
+        elif is_start_tag(chunk):
+            tag_accum.append(chunk)
+
+        elif is_end_tag(chunk):
+            if tag_accum:
+                tag_accum.append(chunk)
+            else:
+                assert cur_word, (
+                    "Weird state, cur_word=%r, result=%r, chunks=%r of %r"
+                    % (cur_word, result, chunk, chunks))
+                cur_word.post_tags.append(chunk)
+        else:
+            assert False
+
+    if not result:
+        return [token('', pre_tags=tag_accum)]
+    else:
+        result[-1].post_tags.extend(tag_accum)
+
+    return result
+
+
+# All the tags in HTML that don't require end tags:
+empty_tags = (
+    'param', 'img', 'area', 'br', 'basefont', 'input',
+    'base', 'meta', 'link', 'col')
+
+block_level_tags = (
+    'address',
+    'blockquote',
+    'center',
+    'dir',
+    'div',
+    'dl',
+    'fieldset',
+    'form',
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+    'hr',
+    'isindex',
+    'menu',
+    'noframes',
+    'noscript',
+    'ol',
+    'p',
+    'pre',
+    'table',
+    'ul',
+    )
+
+block_level_container_tags = (
+    'dd',
+    'dt',
+    'frameset',
+    'li',
+    'tbody',
+    'td',
+    'tfoot',
+    'th',
+    'thead',
+    'tr',
+    )
+
+
+def flatten_el(el, include_hrefs, skip_tag=False):
+    """ Takes an lxml element el, and generates all the text chunks for
+    that tag.  Each start tag is a chunk, each word is a chunk, and each
+    end tag is a chunk.
+
+    If skip_tag is true, then the outermost container tag is
+    not returned (just its contents)."""
+    if not skip_tag:
+        if el.tag == 'img':
+            yield ('img', el.get('src'), start_tag(el))
+        else:
+            yield start_tag(el)
+    if el.tag in empty_tags and not el.text and not len(el) and not el.tail:
+        return
+    start_words = split_words(el.text)
+    for word in start_words:
+        yield html_escape(word)
+    for child in el:
+        yield from flatten_el(child, include_hrefs=include_hrefs)
+    if el.tag == 'a' and el.get('href') and include_hrefs:
+        yield ('href', el.get('href'))
+    if not skip_tag:
+        yield end_tag(el)
+        end_words = split_words(el.tail)
+        for word in end_words:
+            yield html_escape(word)
+
+split_words_re = re.compile(r'\S+(?:\s+|$)', re.U)
+
+def split_words(text):
+    """ Splits some text into words. Includes trailing whitespace
+    on each word when appropriate.  """
+    if not text or not text.strip():
+        return []
+
+    words = split_words_re.findall(text)
+    return words
+
+start_whitespace_re = re.compile(r'^[ \t\n\r]')
+
+def start_tag(el):
+    """
+    The text representation of the start tag for a tag.
+    """
+    return '<%s%s>' % (
+        el.tag, ''.join([' %s="%s"' % (name, html_escape(value, True))
+                         for name, value in el.attrib.items()]))
+
+def end_tag(el):
+    """ The text representation of an end tag for a tag.  Includes
+    trailing whitespace when appropriate.  """
+    if el.tail and start_whitespace_re.search(el.tail):
+        extra = ' '
+    else:
+        extra = ''
+    return '</%s>%s' % (el.tag, extra)
+
+def is_word(tok):
+    return not tok.startswith('<')
+
+def is_end_tag(tok):
+    return tok.startswith('</')
+
+def is_start_tag(tok):
+    return tok.startswith('<') and not tok.startswith('</')
+
+def fixup_ins_del_tags(html):
+    """ Given an html string, move any <ins> or <del> tags inside of any
+    block-level elements, e.g. transform <ins><p>word</p></ins> to
+    <p><ins>word</ins></p> """
+    doc = parse_html(html, cleanup=False)
+    _fixup_ins_del_tags(doc)
+    html = serialize_html_fragment(doc, skip_outer=True)
+    return html
+
+def serialize_html_fragment(el, skip_outer=False):
+    """ Serialize a single lxml element as HTML.  The serialized form
+    includes the elements tail.  
+
+    If skip_outer is true, then don't serialize the outermost tag
+    """
+    assert not isinstance(el, basestring), (
+        "You should pass in an element, not a string like %r" % el)
+    html = etree.tostring(el, method="html", encoding=_unicode)
+    if skip_outer:
+        # Get rid of the extra starting tag:
+        html = html[html.find('>')+1:]
+        # Get rid of the extra end tag:
+        html = html[:html.rfind('<')]
+        return html.strip()
+    else:
+        return html
+
+def _fixup_ins_del_tags(doc):
+    """fixup_ins_del_tags that works on an lxml document in-place
+    """
+    for tag in ['ins', 'del']:
+        for el in doc.xpath('descendant-or-self::%s' % tag):
+            if not _contains_block_level_tag(el):
+                continue
+            _move_el_inside_block(el, tag=tag)
+            el.drop_tag()
+            #_merge_element_contents(el)
+
+def _contains_block_level_tag(el):
+    """True if the element contains any block-level elements, like <p>, <td>, etc.
+    """
+    if el.tag in block_level_tags or el.tag in block_level_container_tags:
+        return True
+    for child in el:
+        if _contains_block_level_tag(child):
+            return True
+    return False
+
+def _move_el_inside_block(el, tag):
+    """ helper for _fixup_ins_del_tags; actually takes the <ins> etc tags
+    and moves them inside any block-level tags.  """
+    for child in el:
+        if _contains_block_level_tag(child):
+            break
+    else:
+        # No block-level tags in any child
+        children_tag = etree.Element(tag)
+        children_tag.text = el.text
+        el.text = None
+        children_tag.extend(list(el))
+        el[:] = [children_tag]
+        return
+    for child in list(el):
+        if _contains_block_level_tag(child):
+            _move_el_inside_block(child, tag)
+            if child.tail:
+                tail_tag = etree.Element(tag)
+                tail_tag.text = child.tail
+                child.tail = None
+                el.insert(el.index(child)+1, tail_tag)
+        else:
+            child_tag = etree.Element(tag)
+            el.replace(child, child_tag)
+            child_tag.append(child)
+    if el.text:
+        text_tag = etree.Element(tag)
+        text_tag.text = el.text
+        el.text = None
+        el.insert(0, text_tag)
+            
+def _merge_element_contents(el):
+    """
+    Removes an element, but merges its contents into its place, e.g.,
+    given <p>Hi <i>there!</i></p>, if you remove the <i> element you get
+    <p>Hi there!</p>
+    """
+    parent = el.getparent()
+    text = el.text or ''
+    if el.tail:
+        if not len(el):
+            text += el.tail
+        else:
+            if el[-1].tail:
+                el[-1].tail += el.tail
+            else:
+                el[-1].tail = el.tail
+    index = parent.index(el)
+    if text:
+        if index == 0:
+            previous = None
+        else:
+            previous = parent[index-1]
+        if previous is None:
+            if parent.text:
+                parent.text += text
+            else:
+                parent.text = text
+        else:
+            if previous.tail:
+                previous.tail += text
+            else:
+                previous.tail = text
+    parent[index:index+1] = el.getchildren()
+
+class InsensitiveSequenceMatcher(difflib.SequenceMatcher):
+    """
+    Acts like SequenceMatcher, but tries not to find very small equal
+    blocks amidst large spans of changes
+    """
+
+    threshold = 2
+    
+    def get_matching_blocks(self):
+        size = min(len(self.b), len(self.b))
+        threshold = min(self.threshold, size / 4)
+        actual = difflib.SequenceMatcher.get_matching_blocks(self)
+        return [item for item in actual
+                if item[2] > threshold
+                or not item[2]]
+
+if __name__ == '__main__':
+    from lxml.html import _diffcommand
+    _diffcommand.main()
+    
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/formfill.py b/.venv/lib/python3.12/site-packages/lxml/html/formfill.py
new file mode 100644
index 00000000..9741c28b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/formfill.py
@@ -0,0 +1,299 @@
+from lxml.etree import XPath, ElementBase
+from lxml.html import fromstring, XHTML_NAMESPACE
+from lxml.html import _forms_xpath, _options_xpath, _nons, _transform_result
+from lxml.html import defs
+import copy
+
+try:
+    basestring
+except NameError:
+    # Python 3
+    basestring = str
+
+__all__ = ['FormNotFound', 'fill_form', 'fill_form_html',
+           'insert_errors', 'insert_errors_html',
+           'DefaultErrorCreator']
+
+class FormNotFound(LookupError):
+    """
+    Raised when no form can be found
+    """
+
+_form_name_xpath = XPath('descendant-or-self::form[name=$name]|descendant-or-self::x:form[name=$name]', namespaces={'x':XHTML_NAMESPACE})
+_input_xpath = XPath('|'.join(['descendant-or-self::'+_tag for _tag in ('input','select','textarea','x:input','x:select','x:textarea')]),
+                               namespaces={'x':XHTML_NAMESPACE})
+_label_for_xpath = XPath('//label[@for=$for_id]|//x:label[@for=$for_id]',
+                               namespaces={'x':XHTML_NAMESPACE})
+_name_xpath = XPath('descendant-or-self::*[@name=$name]')
+
+def fill_form(
+    el,
+    values,
+    form_id=None,
+    form_index=None,
+    ):
+    el = _find_form(el, form_id=form_id, form_index=form_index)
+    _fill_form(el, values)
+
+def fill_form_html(html, values, form_id=None, form_index=None):
+    result_type = type(html)
+    if isinstance(html, basestring):
+        doc = fromstring(html)
+    else:
+        doc = copy.deepcopy(html)
+    fill_form(doc, values, form_id=form_id, form_index=form_index)
+    return _transform_result(result_type, doc)
+
+def _fill_form(el, values):
+    counts = {}
+    if hasattr(values, 'mixed'):
+        # For Paste request parameters
+        values = values.mixed()
+    inputs = _input_xpath(el)
+    for input in inputs:
+        name = input.get('name')
+        if not name:
+            continue
+        if _takes_multiple(input):
+            value = values.get(name, [])
+            if not isinstance(value, (list, tuple)):
+                value = [value]
+            _fill_multiple(input, value)
+        elif name not in values:
+            continue
+        else:
+            index = counts.get(name, 0)
+            counts[name] = index + 1
+            value = values[name]
+            if isinstance(value, (list, tuple)):
+                try:
+                    value = value[index]
+                except IndexError:
+                    continue
+            elif index > 0:
+                continue
+            _fill_single(input, value)
+
+def _takes_multiple(input):
+    if _nons(input.tag) == 'select' and input.get('multiple'):
+        # FIXME: multiple="0"?
+        return True
+    type = input.get('type', '').lower()
+    if type in ('radio', 'checkbox'):
+        return True
+    return False
+
+def _fill_multiple(input, value):
+    type = input.get('type', '').lower()
+    if type == 'checkbox':
+        v = input.get('value')
+        if v is None:
+            if not value:
+                result = False
+            else:
+                result = value[0]
+                if isinstance(value, basestring):
+                    # The only valid "on" value for an unnamed checkbox is 'on'
+                    result = result == 'on'
+            _check(input, result)
+        else:
+            _check(input, v in value)
+    elif type == 'radio':
+        v = input.get('value')
+        _check(input, v in value)
+    else:
+        assert _nons(input.tag) == 'select'
+        for option in _options_xpath(input):
+            v = option.get('value')
+            if v is None:
+                # This seems to be the default, at least on IE
+                # FIXME: but I'm not sure
+                v = option.text_content()
+            _select(option, v in value)
+
+def _check(el, check):
+    if check:
+        el.set('checked', '')
+    else:
+        if 'checked' in el.attrib:
+            del el.attrib['checked']
+
+def _select(el, select):
+    if select:
+        el.set('selected', '')
+    else:
+        if 'selected' in el.attrib:
+            del el.attrib['selected']
+
+def _fill_single(input, value):
+    if _nons(input.tag) == 'textarea':
+        input.text = value
+    else:
+        input.set('value', value)
+
+def _find_form(el, form_id=None, form_index=None):
+    if form_id is None and form_index is None:
+        forms = _forms_xpath(el)
+        for form in forms:
+            return form
+        raise FormNotFound(
+            "No forms in page")
+    if form_id is not None:
+        form = el.get_element_by_id(form_id)
+        if form is not None:
+            return form
+        forms = _form_name_xpath(el, name=form_id)
+        if forms:
+            return forms[0]
+        else:
+            raise FormNotFound(
+                "No form with the name or id of %r (forms: %s)"
+                % (id, ', '.join(_find_form_ids(el))))               
+    if form_index is not None:
+        forms = _forms_xpath(el)
+        try:
+            return forms[form_index]
+        except IndexError:
+            raise FormNotFound(
+                "There is no form with the index %r (%i forms found)"
+                % (form_index, len(forms)))
+
+def _find_form_ids(el):
+    forms = _forms_xpath(el)
+    if not forms:
+        yield '(no forms)'
+        return
+    for index, form in enumerate(forms):
+        if form.get('id'):
+            if form.get('name'):
+                yield '%s or %s' % (form.get('id'),
+                                     form.get('name'))
+            else:
+                yield form.get('id')
+        elif form.get('name'):
+            yield form.get('name')
+        else:
+            yield '(unnamed form %s)' % index
+
+############################################################
+## Error filling
+############################################################
+
+class DefaultErrorCreator:
+    insert_before = True
+    block_inside = True
+    error_container_tag = 'div'
+    error_message_class = 'error-message'
+    error_block_class = 'error-block'
+    default_message = "Invalid"
+
+    def __init__(self, **kw):
+        for name, value in kw.items():
+            if not hasattr(self, name):
+                raise TypeError(
+                    "Unexpected keyword argument: %s" % name)
+            setattr(self, name, value)
+
+    def __call__(self, el, is_block, message):
+        error_el = el.makeelement(self.error_container_tag)
+        if self.error_message_class:
+            error_el.set('class', self.error_message_class)
+        if is_block and self.error_block_class:
+            error_el.set('class', error_el.get('class', '')+' '+self.error_block_class)
+        if message is None or message == '':
+            message = self.default_message
+        if isinstance(message, ElementBase):
+            error_el.append(message)
+        else:
+            assert isinstance(message, basestring), (
+                "Bad message; should be a string or element: %r" % message)
+            error_el.text = message or self.default_message
+        if is_block and self.block_inside:
+            if self.insert_before:
+                error_el.tail = el.text
+                el.text = None
+                el.insert(0, error_el)
+            else:
+                el.append(error_el)
+        else:
+            parent = el.getparent()
+            pos = parent.index(el)
+            if self.insert_before:
+                parent.insert(pos, error_el)
+            else:
+                error_el.tail = el.tail
+                el.tail = None
+                parent.insert(pos+1, error_el)
+
+default_error_creator = DefaultErrorCreator()
+    
+
+def insert_errors(
+    el,
+    errors,
+    form_id=None,
+    form_index=None,
+    error_class="error",
+    error_creator=default_error_creator,
+    ):
+    el = _find_form(el, form_id=form_id, form_index=form_index)
+    for name, error in errors.items():
+        if error is None:
+            continue
+        for error_el, message in _find_elements_for_name(el, name, error):
+            assert isinstance(message, (basestring, type(None), ElementBase)), (
+                "Bad message: %r" % message)
+            _insert_error(error_el, message, error_class, error_creator)
+
+def insert_errors_html(html, values, **kw):
+    result_type = type(html)
+    if isinstance(html, basestring):
+        doc = fromstring(html)
+    else:
+        doc = copy.deepcopy(html)
+    insert_errors(doc, values, **kw)
+    return _transform_result(result_type, doc)
+
+def _insert_error(el, error, error_class, error_creator):
+    if _nons(el.tag) in defs.empty_tags or _nons(el.tag) == 'textarea':
+        is_block = False
+    else:
+        is_block = True
+    if _nons(el.tag) != 'form' and error_class:
+        _add_class(el, error_class)
+    if el.get('id'):
+        labels = _label_for_xpath(el, for_id=el.get('id'))
+        if labels:
+            for label in labels:
+                _add_class(label, error_class)
+    error_creator(el, is_block, error)
+
+def _add_class(el, class_name):
+    if el.get('class'):
+        el.set('class', el.get('class')+' '+class_name)
+    else:
+        el.set('class', class_name)
+
+def _find_elements_for_name(form, name, error):
+    if name is None:
+        # An error for the entire form
+        yield form, error
+        return
+    if name.startswith('#'):
+        # By id
+        el = form.get_element_by_id(name[1:])
+        if el is not None:
+            yield el, error
+        return
+    els = _name_xpath(form, name=name)
+    if not els:
+        # FIXME: should this raise an exception?
+        return
+    if not isinstance(error, (list, tuple)):
+        yield els[0], error
+        return
+    # FIXME: if error is longer than els, should it raise an error?
+    for el, err in zip(els, error):
+        if err is None:
+            continue
+        yield el, err
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py b/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py
new file mode 100644
index 00000000..2f7be156
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py
@@ -0,0 +1,260 @@
+"""
+An interface to html5lib that mimics the lxml.html interface.
+"""
+import sys
+import string
+
+from html5lib import HTMLParser as _HTMLParser
+from html5lib.treebuilders.etree_lxml import TreeBuilder
+from lxml import etree
+from lxml.html import Element, XHTML_NAMESPACE, _contains_block_level_tag
+
+# python3 compatibility
+try:
+    _strings = basestring
+except NameError:
+    _strings = (bytes, str)
+try:
+    from urllib2 import urlopen
+except ImportError:
+    from urllib.request import urlopen
+try:
+    from urlparse import urlparse
+except ImportError:
+    from urllib.parse import urlparse
+
+
+class HTMLParser(_HTMLParser):
+    """An html5lib HTML parser with lxml as tree."""
+
+    def __init__(self, strict=False, **kwargs):
+        _HTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
+
+
+try:
+    from html5lib import XHTMLParser as _XHTMLParser
+except ImportError:
+    pass
+else:
+    class XHTMLParser(_XHTMLParser):
+        """An html5lib XHTML Parser with lxml as tree."""
+
+        def __init__(self, strict=False, **kwargs):
+            _XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
+
+    xhtml_parser = XHTMLParser()
+
+
+def _find_tag(tree, tag):
+    elem = tree.find(tag)
+    if elem is not None:
+        return elem
+    return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag))
+
+
+def document_fromstring(html, guess_charset=None, parser=None):
+    """
+    Parse a whole document into a string.
+
+    If `guess_charset` is true, or if the input is not Unicode but a
+    byte string, the `chardet` library will perform charset guessing
+    on the string.
+    """
+    if not isinstance(html, _strings):
+        raise TypeError('string required')
+
+    if parser is None:
+        parser = html_parser
+
+    options = {}
+    if guess_charset is None and isinstance(html, bytes):
+        # html5lib does not accept useChardet as an argument, if it
+        # detected the html argument would produce unicode objects.
+        guess_charset = True
+    if guess_charset is not None:
+        options['useChardet'] = guess_charset
+    return parser.parse(html, **options).getroot()
+
+
+def fragments_fromstring(html, no_leading_text=False,
+                         guess_charset=None, parser=None):
+    """Parses several HTML elements, returning a list of elements.
+
+    The first item in the list may be a string.  If no_leading_text is true,
+    then it will be an error if there is leading text, and it will always be
+    a list of only elements.
+
+    If `guess_charset` is true, the `chardet` library will perform charset
+    guessing on the string.
+    """
+    if not isinstance(html, _strings):
+        raise TypeError('string required')
+
+    if parser is None:
+        parser = html_parser
+
+    options = {}
+    if guess_charset is None and isinstance(html, bytes):
+        # html5lib does not accept useChardet as an argument, if it
+        # detected the html argument would produce unicode objects.
+        guess_charset = False
+    if guess_charset is not None:
+        options['useChardet'] = guess_charset
+    children = parser.parseFragment(html, 'div', **options)
+    if children and isinstance(children[0], _strings):
+        if no_leading_text:
+            if children[0].strip():
+                raise etree.ParserError('There is leading text: %r' %
+                                        children[0])
+            del children[0]
+    return children
+
+
+def fragment_fromstring(html, create_parent=False,
+                        guess_charset=None, parser=None):
+    """Parses a single HTML element; it is an error if there is more than
+    one element, or if anything but whitespace precedes or follows the
+    element.
+
+    If 'create_parent' is true (or is a tag name) then a parent node
+    will be created to encapsulate the HTML in a single element.  In
+    this case, leading or trailing text is allowed.
+
+    If `guess_charset` is true, the `chardet` library will perform charset
+    guessing on the string.
+    """
+    if not isinstance(html, _strings):
+        raise TypeError('string required')
+
+    accept_leading_text = bool(create_parent)
+
+    elements = fragments_fromstring(
+        html, guess_charset=guess_charset, parser=parser,
+        no_leading_text=not accept_leading_text)
+
+    if create_parent:
+        if not isinstance(create_parent, _strings):
+            create_parent = 'div'
+        new_root = Element(create_parent)
+        if elements:
+            if isinstance(elements[0], _strings):
+                new_root.text = elements[0]
+                del elements[0]
+            new_root.extend(elements)
+        return new_root
+
+    if not elements:
+        raise etree.ParserError('No elements found')
+    if len(elements) > 1:
+        raise etree.ParserError('Multiple elements found')
+    result = elements[0]
+    if result.tail and result.tail.strip():
+        raise etree.ParserError('Element followed by text: %r' % result.tail)
+    result.tail = None
+    return result
+
+
+def fromstring(html, guess_charset=None, parser=None):
+    """Parse the html, returning a single element/document.
+
+    This tries to minimally parse the chunk of text, without knowing if it
+    is a fragment or a document.
+
+    'base_url' will set the document's base_url attribute (and the tree's
+    docinfo.URL)
+
+    If `guess_charset` is true, or if the input is not Unicode but a
+    byte string, the `chardet` library will perform charset guessing
+    on the string.
+    """
+    if not isinstance(html, _strings):
+        raise TypeError('string required')
+    doc = document_fromstring(html, parser=parser,
+                              guess_charset=guess_charset)
+
+    # document starts with doctype or <html>, full document!
+    start = html[:50]
+    if isinstance(start, bytes):
+        # Allow text comparison in python3.
+        # Decode as ascii, that also covers latin-1 and utf-8 for the
+        # characters we need.
+        start = start.decode('ascii', 'replace')
+
+    start = start.lstrip().lower()
+    if start.startswith('<html') or start.startswith('<!doctype'):
+        return doc
+
+    head = _find_tag(doc, 'head')
+
+    # if the head is not empty we have a full document
+    if len(head):
+        return doc
+
+    body = _find_tag(doc, 'body')
+
+    # The body has just one element, so it was probably a single
+    # element passed in
+    if (len(body) == 1 and (not body.text or not body.text.strip())
+        and (not body[-1].tail or not body[-1].tail.strip())):
+        return body[0]
+
+    # Now we have a body which represents a bunch of tags which have the
+    # content that was passed in.  We will create a fake container, which
+    # is the body tag, except <body> implies too much structure.
+    if _contains_block_level_tag(body):
+        body.tag = 'div'
+    else:
+        body.tag = 'span'
+    return body
+
+
+def parse(filename_url_or_file, guess_charset=None, parser=None):
+    """Parse a filename, URL, or file-like object into an HTML document
+    tree.  Note: this returns a tree, not an element.  Use
+    ``parse(...).getroot()`` to get the document root.
+
+    If ``guess_charset`` is true, the ``useChardet`` option is passed into
+    html5lib to enable character detection.  This option is on by default
+    when parsing from URLs, off by default when parsing from file(-like)
+    objects (which tend to return Unicode more often than not), and on by
+    default when parsing from a file path (which is read in binary mode).
+    """
+    if parser is None:
+        parser = html_parser
+    if not isinstance(filename_url_or_file, _strings):
+        fp = filename_url_or_file
+        if guess_charset is None:
+            # assume that file-like objects return Unicode more often than bytes
+            guess_charset = False
+    elif _looks_like_url(filename_url_or_file):
+        fp = urlopen(filename_url_or_file)
+        if guess_charset is None:
+            # assume that URLs return bytes
+            guess_charset = True
+    else:
+        fp = open(filename_url_or_file, 'rb')
+        if guess_charset is None:
+            guess_charset = True
+
+    options = {}
+    # html5lib does not accept useChardet as an argument, if it
+    # detected the html argument would produce unicode objects.
+    if guess_charset:
+        options['useChardet'] = guess_charset
+    return parser.parse(fp, **options)
+
+
+def _looks_like_url(str):
+    scheme = urlparse(str)[0]
+    if not scheme:
+        return False
+    elif (sys.platform == 'win32' and
+            scheme in string.ascii_letters
+            and len(scheme) == 1):
+        # looks like a 'normal' absolute path
+        return False
+    else:
+        return True
+
+
+html_parser = HTMLParser()
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/soupparser.py b/.venv/lib/python3.12/site-packages/lxml/html/soupparser.py
new file mode 100644
index 00000000..b288a8a1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/soupparser.py
@@ -0,0 +1,314 @@
+"""External interface to the BeautifulSoup HTML parser.
+"""
+
+__all__ = ["fromstring", "parse", "convert_tree"]
+
+import re
+from lxml import etree, html
+
+try:
+    from bs4 import (
+        BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
+        Declaration, Doctype)
+    _DECLARATION_OR_DOCTYPE = (Declaration, Doctype)
+except ImportError:
+    from BeautifulSoup import (
+        BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
+        Declaration)
+    _DECLARATION_OR_DOCTYPE = Declaration
+
+
+def fromstring(data, beautifulsoup=None, makeelement=None, **bsargs):
+    """Parse a string of HTML data into an Element tree using the
+    BeautifulSoup parser.
+
+    Returns the root ``<html>`` Element of the tree.
+
+    You can pass a different BeautifulSoup parser through the
+    `beautifulsoup` keyword, and a diffent Element factory function
+    through the `makeelement` keyword.  By default, the standard
+    ``BeautifulSoup`` class and the default factory of `lxml.html` are
+    used.
+    """
+    return _parse(data, beautifulsoup, makeelement, **bsargs)
+
+
+def parse(file, beautifulsoup=None, makeelement=None, **bsargs):
+    """Parse a file into an ElemenTree using the BeautifulSoup parser.
+
+    You can pass a different BeautifulSoup parser through the
+    `beautifulsoup` keyword, and a diffent Element factory function
+    through the `makeelement` keyword.  By default, the standard
+    ``BeautifulSoup`` class and the default factory of `lxml.html` are
+    used.
+    """
+    if not hasattr(file, 'read'):
+        file = open(file)
+    root = _parse(file, beautifulsoup, makeelement, **bsargs)
+    return etree.ElementTree(root)
+
+
+def convert_tree(beautiful_soup_tree, makeelement=None):
+    """Convert a BeautifulSoup tree to a list of Element trees.
+
+    Returns a list instead of a single root Element to support
+    HTML-like soup with more than one root element.
+
+    You can pass a different Element factory through the `makeelement`
+    keyword.
+    """
+    root = _convert_tree(beautiful_soup_tree, makeelement)
+    children = root.getchildren()
+    for child in children:
+        root.remove(child)
+    return children
+
+
+# helpers
+
+def _parse(source, beautifulsoup, makeelement, **bsargs):
+    if beautifulsoup is None:
+        beautifulsoup = BeautifulSoup
+    if hasattr(beautifulsoup, "HTML_ENTITIES"):  # bs3
+        if 'convertEntities' not in bsargs:
+            bsargs['convertEntities'] = 'html'
+    if hasattr(beautifulsoup, "DEFAULT_BUILDER_FEATURES"):  # bs4
+        if 'features' not in bsargs:
+            bsargs['features'] = 'html.parser'  # use Python html parser
+    tree = beautifulsoup(source, **bsargs)
+    root = _convert_tree(tree, makeelement)
+    # from ET: wrap the document in a html root element, if necessary
+    if len(root) == 1 and root[0].tag == "html":
+        return root[0]
+    root.tag = "html"
+    return root
+
+
+_parse_doctype_declaration = re.compile(
+    r'(?:\s|[<!])*DOCTYPE\s*HTML'
+    r'(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?'
+    r'(?:\s+(\'[^\']*\'|"[^"]*"))?',
+    re.IGNORECASE).match
+
+
+class _PseudoTag:
+    # Minimal imitation of BeautifulSoup.Tag
+    def __init__(self, contents):
+        self.name = 'html'
+        self.attrs = []
+        self.contents = contents
+
+    def __iter__(self):
+        return self.contents.__iter__()
+
+
+def _convert_tree(beautiful_soup_tree, makeelement):
+    if makeelement is None:
+        makeelement = html.html_parser.makeelement
+
+    # Split the tree into three parts:
+    # i) everything before the root element: document type
+    # declaration, comments, processing instructions, whitespace
+    # ii) the root(s),
+    # iii) everything after the root: comments, processing
+    # instructions, whitespace
+    first_element_idx = last_element_idx = None
+    html_root = declaration = None
+    for i, e in enumerate(beautiful_soup_tree):
+        if isinstance(e, Tag):
+            if first_element_idx is None:
+                first_element_idx = i
+            last_element_idx = i
+            if html_root is None and e.name and e.name.lower() == 'html':
+                html_root = e
+        elif declaration is None and isinstance(e, _DECLARATION_OR_DOCTYPE):
+            declaration = e
+
+    # For a nice, well-formatted document, the variable roots below is
+    # a list consisting of a single <html> element. However, the document
+    # may be a soup like '<meta><head><title>Hello</head><body>Hi
+    # all<\p>'. In this example roots is a list containing meta, head
+    # and body elements.
+    if first_element_idx is None:
+        pre_root = post_root = []
+        roots = beautiful_soup_tree.contents
+    else:
+        pre_root = beautiful_soup_tree.contents[:first_element_idx]
+        roots = beautiful_soup_tree.contents[first_element_idx:last_element_idx+1]
+        post_root = beautiful_soup_tree.contents[last_element_idx+1:]
+
+    # Reorganize so that there is one <html> root...
+    if html_root is not None:
+        # ... use existing one if possible, ...
+        i = roots.index(html_root)
+        html_root.contents = roots[:i] + html_root.contents + roots[i+1:]
+    else:
+        # ... otherwise create a new one.
+        html_root = _PseudoTag(roots)
+
+    convert_node = _init_node_converters(makeelement)
+
+    # Process pre_root
+    res_root = convert_node(html_root)
+    prev = res_root
+    for e in reversed(pre_root):
+        converted = convert_node(e)
+        if converted is not None:
+            prev.addprevious(converted)
+            prev = converted
+
+    # ditto for post_root
+    prev = res_root
+    for e in post_root:
+        converted = convert_node(e)
+        if converted is not None:
+            prev.addnext(converted)
+            prev = converted
+
+    if declaration is not None:
+        try:
+            # bs4 provides full Doctype string
+            doctype_string = declaration.output_ready()
+        except AttributeError:
+            doctype_string = declaration.string
+
+        match = _parse_doctype_declaration(doctype_string)
+        if not match:
+            # Something is wrong if we end up in here. Since soupparser should
+            # tolerate errors, do not raise Exception, just let it pass.
+            pass
+        else:
+            external_id, sys_uri = match.groups()
+            docinfo = res_root.getroottree().docinfo
+            # strip quotes and update DOCTYPE values (any of None, '', '...')
+            docinfo.public_id = external_id and external_id[1:-1]
+            docinfo.system_url = sys_uri and sys_uri[1:-1]
+
+    return res_root
+
+
+def _init_node_converters(makeelement):
+    converters = {}
+    ordered_node_types = []
+
+    def converter(*types):
+        def add(handler):
+            for t in types:
+                converters[t] = handler
+                ordered_node_types.append(t)
+            return handler
+        return add
+
+    def find_best_converter(node):
+        for t in ordered_node_types:
+            if isinstance(node, t):
+                return converters[t]
+        return None
+
+    def convert_node(bs_node, parent=None):
+        # duplicated in convert_tag() below
+        try:
+            handler = converters[type(bs_node)]
+        except KeyError:
+            handler = converters[type(bs_node)] = find_best_converter(bs_node)
+        if handler is None:
+            return None
+        return handler(bs_node, parent)
+
+    def map_attrs(bs_attrs):
+        if isinstance(bs_attrs, dict):  # bs4
+            attribs = {}
+            for k, v in bs_attrs.items():
+                if isinstance(v, list):
+                    v = " ".join(v)
+                attribs[k] = unescape(v)
+        else:
+            attribs = {k: unescape(v) for k, v in bs_attrs}
+        return attribs
+
+    def append_text(parent, text):
+        if len(parent) == 0:
+            parent.text = (parent.text or '') + text
+        else:
+            parent[-1].tail = (parent[-1].tail or '') + text
+
+    # converters are tried in order of their definition
+
+    @converter(Tag, _PseudoTag)
+    def convert_tag(bs_node, parent):
+        attrs = bs_node.attrs
+        if parent is not None:
+            attribs = map_attrs(attrs) if attrs else None
+            res = etree.SubElement(parent, bs_node.name, attrib=attribs)
+        else:
+            attribs = map_attrs(attrs) if attrs else {}
+            res = makeelement(bs_node.name, attrib=attribs)
+
+        for child in bs_node:
+            # avoid double recursion by inlining convert_node(), see above
+            try:
+                handler = converters[type(child)]
+            except KeyError:
+                pass
+            else:
+                if handler is not None:
+                    handler(child, res)
+                continue
+            convert_node(child, res)
+        return res
+
+    @converter(Comment)
+    def convert_comment(bs_node, parent):
+        res = html.HtmlComment(bs_node)
+        if parent is not None:
+            parent.append(res)
+        return res
+
+    @converter(ProcessingInstruction)
+    def convert_pi(bs_node, parent):
+        if bs_node.endswith('?'):
+            # The PI is of XML style (<?as df?>) but BeautifulSoup
+            # interpreted it as being SGML style (<?as df>). Fix.
+            bs_node = bs_node[:-1]
+        res = etree.ProcessingInstruction(*bs_node.split(' ', 1))
+        if parent is not None:
+            parent.append(res)
+        return res
+
+    @converter(NavigableString)
+    def convert_text(bs_node, parent):
+        if parent is not None:
+            append_text(parent, unescape(bs_node))
+        return None
+
+    return convert_node
+
+
+# copied from ET's ElementSoup
+
+try:
+    from html.entities import name2codepoint  # Python 3
+except ImportError:
+    from htmlentitydefs import name2codepoint
+
+
+handle_entities = re.compile(r"&(\w+);").sub
+
+
+try:
+    unichr
+except NameError:
+    # Python 3
+    unichr = chr
+
+
+def unescape(string):
+    if not string:
+        return ''
+    # work around oddities in BeautifulSoup's entity handling
+    def unescape_entity(m):
+        try:
+            return unichr(name2codepoint[m.group(1)])
+        except KeyError:
+            return m.group(0)  # use as is
+    return handle_entities(unescape_entity, string)
diff --git a/.venv/lib/python3.12/site-packages/lxml/html/usedoctest.py b/.venv/lib/python3.12/site-packages/lxml/html/usedoctest.py
new file mode 100644
index 00000000..f352a1cc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/html/usedoctest.py
@@ -0,0 +1,13 @@
+"""Doctest module for HTML comparison.
+
+Usage::
+
+   >>> import lxml.html.usedoctest
+   >>> # now do your HTML doctests ...
+
+See `lxml.doctestcompare`.
+"""
+
+from lxml import doctestcompare
+
+doctestcompare.temp_install(html=True, del_module=__name__)