Add fallback decoding for asgi headers (#2837)

* Add latin-1 fallback decoding for asgi headers

* Add comment for ASGI encoding spec and change to unicode_escape

* add unit test for non-utf8 header decoding

* add changelog

* revert lint

* code review changes

* Fix changelog

* Add ASGIGetter test

---------

Co-authored-by: Emídio Neto <9735060+emdneto@users.noreply.github.com>
Co-authored-by: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
This commit is contained in:
Rocky Ken
2024-09-25 08:19:20 -07:00
committed by GitHub
parent 3deb6b9db6
commit a084c2c7df
4 changed files with 50 additions and 6 deletions

View File

@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#2537](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2537)) ([#2537](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2537))
- `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-fastapi` Add ability to disable internal HTTP send and receive spans - `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-fastapi` Add ability to disable internal HTTP send and receive spans
([#2802](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2802)) ([#2802](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2802))
- `opentelemetry-instrumentation-asgi` Add fallback decoding for ASGI headers
([#2837](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2837))
### Breaking changes ### Breaking changes

View File

@ -284,9 +284,9 @@ class ASGIGetter(Getter[dict]):
# ASGI header keys are in lower case # ASGI header keys are in lower case
key = key.lower() key = key.lower()
decoded = [ decoded = [
_value.decode("utf8") _decode_header_item(_value)
for (_key, _value) in headers for (_key, _value) in headers
if _key.decode("utf8").lower() == key if _decode_header_item(_key).lower() == key
] ]
if not decoded: if not decoded:
return None return None
@ -294,7 +294,7 @@ class ASGIGetter(Getter[dict]):
def keys(self, carrier: dict) -> typing.List[str]: def keys(self, carrier: dict) -> typing.List[str]:
headers = carrier.get("headers") or [] headers = carrier.get("headers") or []
return [_key.decode("utf8") for (_key, _value) in headers] return [_decode_header_item(_key) for (_key, _value) in headers]
asgi_getter = ASGIGetter() asgi_getter = ASGIGetter()
@ -410,7 +410,9 @@ def collect_custom_headers_attributes(
if raw_headers: if raw_headers:
for key, value in raw_headers: for key, value in raw_headers:
# Decode headers before processing. # Decode headers before processing.
headers[key.decode()].append(value.decode()) headers[_decode_header_item(key)].append(
_decode_header_item(value)
)
return sanitize.sanitize_header_values( return sanitize.sanitize_header_values(
headers, headers,
@ -979,3 +981,13 @@ def _parse_active_request_count_attrs(
_server_active_requests_count_attrs_new, _server_active_requests_count_attrs_new,
sem_conv_opt_in_mode, sem_conv_opt_in_mode,
) )
def _decode_header_item(value):
try:
return value.decode("utf-8")
except ValueError:
# ASGI header encoding specs, see:
# - https://asgi.readthedocs.io/en/latest/specs/www.html#wsgi-encoding-differences (see: WSGI encoding differences)
# - https://docs.python.org/3/library/codecs.html#text-encodings (see: Text Encodings)
return value.decode("unicode_escape")

View File

@ -48,6 +48,14 @@ async def http_app_with_custom_headers(scope, receive, send):
b"my-custom-regex-value-3,my-custom-regex-value-4", b"my-custom-regex-value-3,my-custom-regex-value-4",
), ),
(b"my-secret-header", b"my-secret-value"), (b"my-secret-header", b"my-secret-value"),
(
b"non-utf8-header",
b"Moto Z\xb2",
),
(
b"Moto-Z\xb2-non-utf8-header-key",
b"Moto Z\xb2",
),
], ],
} }
) )
@ -130,6 +138,14 @@ class TestCustomHeaders(AsyncAsgiTestBase):
(b"Regex-Test-Header-1", b"Regex Test Value 1"), (b"Regex-Test-Header-1", b"Regex Test Value 1"),
(b"regex-test-header-2", b"RegexTestValue2,RegexTestValue3"), (b"regex-test-header-2", b"RegexTestValue2,RegexTestValue3"),
(b"My-Secret-Header", b"My Secret Value"), (b"My-Secret-Header", b"My Secret Value"),
(
b"non-utf8-header",
b"Moto Z\xb2",
),
(
b"Moto-Z\xb2-non-utf8-header-key",
b"Moto Z\xb2",
),
] ]
) )
self.seed_app(self.app) self.seed_app(self.app)
@ -147,6 +163,8 @@ class TestCustomHeaders(AsyncAsgiTestBase):
"http.request.header.regex_test_header_2": ( "http.request.header.regex_test_header_2": (
"RegexTestValue2,RegexTestValue3", "RegexTestValue2,RegexTestValue3",
), ),
"http.request.header.non_utf8_header": ("Moto Z²",),
"http.request.header.moto_z²_non_utf8_header_key": ("Moto Z²",),
"http.request.header.my_secret_header": ("[REDACTED]",), "http.request.header.my_secret_header": ("[REDACTED]",),
} }
for span in span_list: for span in span_list:
@ -223,6 +241,8 @@ class TestCustomHeaders(AsyncAsgiTestBase):
"my-custom-regex-value-3,my-custom-regex-value-4", "my-custom-regex-value-3,my-custom-regex-value-4",
), ),
"http.response.header.my_secret_header": ("[REDACTED]",), "http.response.header.my_secret_header": ("[REDACTED]",),
"http.response.header.non_utf8_header": ("Moto Z²",),
"http.response.header.moto_z²_non_utf8_header_key": ("Moto Z²",),
} }
for span in span_list: for span in span_list:
if span.kind == SpanKind.SERVER: if span.kind == SpanKind.SERVER:
@ -418,8 +438,8 @@ class TestCustomHeaders(AsyncAsgiTestBase):
SANITIZE_FIELDS_TEST_VALUE = ".*my-secret.*" SANITIZE_FIELDS_TEST_VALUE = ".*my-secret.*"
SERVER_REQUEST_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*" SERVER_REQUEST_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*,non-utf8-header,Moto-Z²-non-utf8-header-key"
SERVER_RESPONSE_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*" SERVER_RESPONSE_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*,non-utf8-header,Moto-Z²-non-utf8-header-key"
class TestCustomHeadersEnv(TestCustomHeaders): class TestCustomHeadersEnv(TestCustomHeaders):

View File

@ -69,3 +69,13 @@ class TestASGIGetter(TestCase):
expected_val, expected_val,
"Should be equal", "Should be equal",
) )
def test_non_utf8_headers(self):
getter = ASGIGetter()
carrier = {"headers": [(b"test-key", b"Moto Z\xb2")]}
expected_val = ["Moto Z²"]
self.assertEqual(
getter.get(carrier, "test-key"),
expected_val,
"Should be equal",
)