aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/bs4/tests/test_element.py
blob: 0861eb1c5f3dd4a0269c8ff87b537dfc5c5f8042 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""Tests of classes in element.py.

The really big classes -- Tag, PageElement, and NavigableString --
are tested in separate files.
"""

import pytest
from bs4.element import (
    HTMLAttributeDict,
    XMLAttributeDict,
    CharsetMetaAttributeValue,
    ContentMetaAttributeValue,
    NamespacedAttribute,
    ResultSet,
)

class TestNamedspacedAttribute:
    def test_name_may_be_none_or_missing(self):
        a = NamespacedAttribute("xmlns", None)
        assert a == "xmlns"

        a = NamespacedAttribute("xmlns", "")
        assert a == "xmlns"

        a = NamespacedAttribute("xmlns")
        assert a == "xmlns"

    def test_namespace_may_be_none_or_missing(self):
        a = NamespacedAttribute(None, "tag")
        assert a == "tag"

        a = NamespacedAttribute("", "tag")
        assert a == "tag"

    def test_attribute_is_equivalent_to_colon_separated_string(self):
        a = NamespacedAttribute("a", "b")
        assert "a:b" == a

    def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
        a = NamespacedAttribute("a", "b", "c")
        b = NamespacedAttribute("a", "b", "c")
        assert a == b

        # The actual namespace is not considered.
        c = NamespacedAttribute("a", "b", None)
        assert a == c

        # But name and prefix are important.
        d = NamespacedAttribute("a", "z", "c")
        assert a != d

        e = NamespacedAttribute("z", "b", "c")
        assert a != e


class TestAttributeValueWithCharsetSubstitution:
    """Certain attributes are designed to have the charset of the
    final document substituted into their value.
    """

    def test_charset_meta_attribute_value(self):
        # The value of a CharsetMetaAttributeValue is whatever
        # encoding the string is in.
        value = CharsetMetaAttributeValue("euc-jp")
        assert "euc-jp" == value
        assert "euc-jp" == value.original_value
        assert "utf8" == value.substitute_encoding("utf8")
        assert "ascii" == value.substitute_encoding("ascii")

        # If the target encoding is a Python internal encoding,
        # no encoding will be mentioned in the output HTML.
        assert "" == value.substitute_encoding("palmos")

    def test_content_meta_attribute_value(self):
        value = ContentMetaAttributeValue("text/html; charset=euc-jp")
        assert "text/html; charset=euc-jp" == value
        assert "text/html; charset=euc-jp" == value.original_value
        assert "text/html; charset=utf8" == value.substitute_encoding("utf8")
        assert "text/html; charset=ascii" == value.substitute_encoding("ascii")

        # If the target encoding is a Python internal encoding, the
        # charset argument will be omitted altogether.
        assert "text/html" == value.substitute_encoding("palmos")


class TestAttributeDicts:
    def test_xml_attribute_value_handling(self):
        # Verify that attribute values are processed according to the
        # XML spec's rules.
        d = XMLAttributeDict()
        d["v"] = 100
        assert d["v"] == "100"
        d["v"] = 100.123
        assert d["v"] == "100.123"

        # This preserves Beautiful Soup's old behavior in the absence of
        # guidance from the spec.
        d["v"] = False
        assert d["v"] is False

        d["v"] = True
        assert d["v"] is True

        d["v"] = None
        assert d["v"] == ""

    def test_html_attribute_value_handling(self):
        # Verify that attribute values are processed according to the
        # HTML spec's rules.
        d = HTMLAttributeDict()
        d["v"] = 100
        assert d["v"] == "100"
        d["v"] = 100.123
        assert d["v"] == "100.123"

        d["v"] = False
        assert "v" not in d

        d["v"] = None
        assert "v" not in d

        d["v"] = True
        assert d["v"] == "v"

        attribute = NamespacedAttribute("prefix", "name", "namespace")
        d[attribute] = True
        assert d[attribute] == "name"


class TestResultSet:
    def test_getattr_exception(self):
        rs = ResultSet(None)
        with pytest.raises(AttributeError) as e:
            rs.name
        assert (
            """ResultSet object has no attribute "name". You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?"""
            == str(e.value)
        )