1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
"""Tests of classes in element.py.
The really big classes -- Tag, PageElement, and NavigableString --
are tested in separate files.
"""
import pytest
from bs4.element import (
HTMLAttributeDict,
XMLAttributeDict,
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
NamespacedAttribute,
ResultSet,
)
class TestNamedspacedAttribute:
def test_name_may_be_none_or_missing(self):
a = NamespacedAttribute("xmlns", None)
assert a == "xmlns"
a = NamespacedAttribute("xmlns", "")
assert a == "xmlns"
a = NamespacedAttribute("xmlns")
assert a == "xmlns"
def test_namespace_may_be_none_or_missing(self):
a = NamespacedAttribute(None, "tag")
assert a == "tag"
a = NamespacedAttribute("", "tag")
assert a == "tag"
def test_attribute_is_equivalent_to_colon_separated_string(self):
a = NamespacedAttribute("a", "b")
assert "a:b" == a
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
a = NamespacedAttribute("a", "b", "c")
b = NamespacedAttribute("a", "b", "c")
assert a == b
# The actual namespace is not considered.
c = NamespacedAttribute("a", "b", None)
assert a == c
# But name and prefix are important.
d = NamespacedAttribute("a", "z", "c")
assert a != d
e = NamespacedAttribute("z", "b", "c")
assert a != e
class TestAttributeValueWithCharsetSubstitution:
"""Certain attributes are designed to have the charset of the
final document substituted into their value.
"""
def test_charset_meta_attribute_value(self):
# The value of a CharsetMetaAttributeValue is whatever
# encoding the string is in.
value = CharsetMetaAttributeValue("euc-jp")
assert "euc-jp" == value
assert "euc-jp" == value.original_value
assert "utf8" == value.substitute_encoding("utf8")
assert "ascii" == value.substitute_encoding("ascii")
# If the target encoding is a Python internal encoding,
# no encoding will be mentioned in the output HTML.
assert "" == value.substitute_encoding("palmos")
def test_content_meta_attribute_value(self):
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
assert "text/html; charset=euc-jp" == value
assert "text/html; charset=euc-jp" == value.original_value
assert "text/html; charset=utf8" == value.substitute_encoding("utf8")
assert "text/html; charset=ascii" == value.substitute_encoding("ascii")
# If the target encoding is a Python internal encoding, the
# charset argument will be omitted altogether.
assert "text/html" == value.substitute_encoding("palmos")
class TestAttributeDicts:
def test_xml_attribute_value_handling(self):
# Verify that attribute values are processed according to the
# XML spec's rules.
d = XMLAttributeDict()
d["v"] = 100
assert d["v"] == "100"
d["v"] = 100.123
assert d["v"] == "100.123"
# This preserves Beautiful Soup's old behavior in the absence of
# guidance from the spec.
d["v"] = False
assert d["v"] is False
d["v"] = True
assert d["v"] is True
d["v"] = None
assert d["v"] == ""
def test_html_attribute_value_handling(self):
# Verify that attribute values are processed according to the
# HTML spec's rules.
d = HTMLAttributeDict()
d["v"] = 100
assert d["v"] == "100"
d["v"] = 100.123
assert d["v"] == "100.123"
d["v"] = False
assert "v" not in d
d["v"] = None
assert "v" not in d
d["v"] = True
assert d["v"] == "v"
attribute = NamespacedAttribute("prefix", "name", "namespace")
d[attribute] = True
assert d[attribute] == "name"
class TestResultSet:
def test_getattr_exception(self):
rs = ResultSet(None)
with pytest.raises(AttributeError) as e:
rs.name
assert (
"""ResultSet object has no attribute "name". You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?"""
== str(e.value)
)
|