1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logging import getLogger
from re import compile, split
from typing import Dict, List, Mapping
from urllib.parse import unquote
from deprecated import deprecated
_logger = getLogger(__name__)
# The following regexes reference this spec: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#specifying-headers-via-environment-variables
# Optional whitespace
_OWS = r"[ \t]*"
# A key contains printable US-ASCII characters except: SP and "(),/:;<=>?@[\]{}
_KEY_FORMAT = (
r"[\x21\x23-\x27\x2a\x2b\x2d\x2e\x30-\x39\x41-\x5a\x5e-\x7a\x7c\x7e]+"
)
# A value contains a URL-encoded UTF-8 string. The encoded form can contain any
# printable US-ASCII characters (0x20-0x7f) other than SP, DEL, and ",;/
_VALUE_FORMAT = r"[\x21\x23-\x2b\x2d-\x3a\x3c-\x5b\x5d-\x7e]*"
# Like above with SP included
_LIBERAL_VALUE_FORMAT = r"[\x20\x21\x23-\x2b\x2d-\x3a\x3c-\x5b\x5d-\x7e]*"
# A key-value is key=value, with optional whitespace surrounding key and value
_KEY_VALUE_FORMAT = rf"{_OWS}{_KEY_FORMAT}{_OWS}={_OWS}{_VALUE_FORMAT}{_OWS}"
_HEADER_PATTERN = compile(_KEY_VALUE_FORMAT)
_LIBERAL_HEADER_PATTERN = compile(
rf"{_OWS}{_KEY_FORMAT}{_OWS}={_OWS}{_LIBERAL_VALUE_FORMAT}{_OWS}"
)
_DELIMITER_PATTERN = compile(r"[ \t]*,[ \t]*")
_BAGGAGE_PROPERTY_FORMAT = rf"{_KEY_VALUE_FORMAT}|{_OWS}{_KEY_FORMAT}{_OWS}"
_INVALID_HEADER_ERROR_MESSAGE_STRICT_TEMPLATE = (
"Header format invalid! Header values in environment variables must be "
"URL encoded per the OpenTelemetry Protocol Exporter specification: %s"
)
_INVALID_HEADER_ERROR_MESSAGE_LIBERAL_TEMPLATE = (
"Header format invalid! Header values in environment variables must be "
"URL encoded per the OpenTelemetry Protocol Exporter specification or "
"a comma separated list of name=value occurrences: %s"
)
# pylint: disable=invalid-name
@deprecated(version="1.15.0", reason="You should use parse_env_headers") # type: ignore
def parse_headers(s: str) -> Mapping[str, str]:
return parse_env_headers(s)
def parse_env_headers(s: str, liberal: bool = False) -> Mapping[str, str]:
"""
Parse ``s``, which is a ``str`` instance containing HTTP headers encoded
for use in ENV variables per the W3C Baggage HTTP header format at
https://www.w3.org/TR/baggage/#baggage-http-header-format, except that
additional semi-colon delimited metadata is not supported.
If ``liberal`` is True we try to parse ``s`` anyway to be more compatible
with other languages SDKs that accept non URL-encoded headers by default.
"""
headers: Dict[str, str] = {}
headers_list: List[str] = split(_DELIMITER_PATTERN, s)
for header in headers_list:
if not header: # empty string
continue
header_match = _HEADER_PATTERN.fullmatch(header.strip())
if not header_match and not liberal:
_logger.warning(
_INVALID_HEADER_ERROR_MESSAGE_STRICT_TEMPLATE, header
)
continue
if header_match:
match_string: str = header_match.string
# value may contain any number of `=`
name, value = match_string.split("=", 1)
name = unquote(name).strip().lower()
value = unquote(value).strip()
headers[name] = value
else:
# this is not url-encoded and does not match the spec but we decided to be
# liberal in what we accept to match other languages SDKs behaviour
liberal_header_match = _LIBERAL_HEADER_PATTERN.fullmatch(
header.strip()
)
if not liberal_header_match:
_logger.warning(
_INVALID_HEADER_ERROR_MESSAGE_LIBERAL_TEMPLATE, header
)
continue
liberal_match_string: str = liberal_header_match.string
# value may contain any number of `=`
name, value = liberal_match_string.split("=", 1)
name = name.strip().lower()
value = value.strip()
headers[name] = value
return headers
|