Repeated headers list for ASGI frameworks (#2361)

* avoid loosing repeated HTTP headers

* fix fof wsgi, test in falcon

* add changelog

* add more tests

* linting

* fix falcon and flask

* remove unused test

* Use a list for repeated HTTP headers

* linting

* add changelog entry

* update docs and improve fastapi tests

* revert changes in wsgi based webframeworks

* fix linting

* Fix import path of typing symbols

---------

Co-authored-by: Leighton Chen <lechen@microsoft.com>
Co-authored-by: Diego Hurtado <ocelotl@users.noreply.github.com>
This commit is contained in:
Samuel Colvin
2024-06-20 23:37:26 +01:00
committed by GitHub
parent a61739c9c2
commit f6ed62a7aa
7 changed files with 77 additions and 52 deletions

View File

@ -48,7 +48,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#2425](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2425)) ([#2425](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2425))
- `opentelemetry-instrumentation-flask` Add `http.method` to `span.name` - `opentelemetry-instrumentation-flask` Add `http.method` to `span.name`
([#2454](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2454)) ([#2454](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2454))
- ASGI, FastAPI, Starlette: provide both send and receive hooks with `scope` and `message` for internal spans ([#2546](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2546)) - Record repeated HTTP headers in lists, rather than a comma separate strings for ASGI based web frameworks
([#2361](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2361))
- ASGI, FastAPI, Starlette: provide both send and receive hooks with `scope` and `message` for internal spans
- ([#2546](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2546))
### Added ### Added

View File

@ -129,10 +129,10 @@ To capture all request headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_
The name of the added span attribute will follow the format ``http.request.header.<header_name>`` where ``<header_name>`` The name of the added span attribute will follow the format ``http.request.header.<header_name>`` where ``<header_name>``
is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
single item list containing all the header values. list containing the header values.
For example: For example:
``http.request.header.custom_request_header = ["<value1>,<value2>"]`` ``http.request.header.custom_request_header = ["<value1>", "<value2>"]``
Response headers Response headers
**************** ****************
@ -163,10 +163,10 @@ To capture all response headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS
The name of the added span attribute will follow the format ``http.response.header.<header_name>`` where ``<header_name>`` The name of the added span attribute will follow the format ``http.response.header.<header_name>`` where ``<header_name>``
is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
single item list containing all the header values. list containing the header values.
For example: For example:
``http.response.header.custom_response_header = ["<value1>,<value2>"]`` ``http.response.header.custom_response_header = ["<value1>", "<value2>"]``
Sanitizing headers Sanitizing headers
****************** ******************
@ -193,9 +193,10 @@ from __future__ import annotations
import typing import typing
import urllib import urllib
from collections import defaultdict
from functools import wraps from functools import wraps
from timeit import default_timer from timeit import default_timer
from typing import Any, Awaitable, Callable, Tuple from typing import Any, Awaitable, Callable, DefaultDict, Tuple
from asgiref.compatibility import guarantee_single_callable from asgiref.compatibility import guarantee_single_callable
@ -340,24 +341,19 @@ def collect_custom_headers_attributes(
sanitize: SanitizeValue, sanitize: SanitizeValue,
header_regexes: list[str], header_regexes: list[str],
normalize_names: Callable[[str], str], normalize_names: Callable[[str], str],
) -> dict[str, str]: ) -> dict[str, list[str]]:
""" """
Returns custom HTTP request or response headers to be added into SERVER span as span attributes. Returns custom HTTP request or response headers to be added into SERVER span as span attributes.
Refer specifications: Refer specifications:
- https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/http.md#http-request-and-response-headers - https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/http.md#http-request-and-response-headers
""" """
# Decode headers before processing. headers: DefaultDict[str, list[str]] = defaultdict(list)
headers: dict[str, str] = {}
raw_headers = scope_or_response_message.get("headers") raw_headers = scope_or_response_message.get("headers")
if raw_headers: if raw_headers:
for _key, _value in raw_headers: for key, value in raw_headers:
key = _key.decode().lower() # Decode headers before processing.
value = _value.decode() headers[key.decode()].append(value.decode())
if key in headers:
headers[key] += f",{value}"
else:
headers[key] = value
return sanitize.sanitize_header_values( return sanitize.sanitize_header_values(
headers, headers,

View File

@ -152,7 +152,8 @@ class TestCustomHeaders(AsgiTestBase, TestBase):
span_list = self.exporter.get_finished_spans() span_list = self.exporter.get_finished_spans()
expected = { expected = {
"http.request.header.custom_test_header_1": ( "http.request.header.custom_test_header_1": (
"test-header-value-1,test-header-value-2", "test-header-value-1",
"test-header-value-2",
), ),
} }
span = next(span for span in span_list if span.kind == SpanKind.SERVER) span = next(span for span in span_list if span.kind == SpanKind.SERVER)
@ -225,7 +226,8 @@ class TestCustomHeaders(AsgiTestBase, TestBase):
span_list = self.exporter.get_finished_spans() span_list = self.exporter.get_finished_spans()
expected = { expected = {
"http.response.header.custom_test_header_1": ( "http.response.header.custom_test_header_1": (
"test-header-value-1,test-header-value-2", "test-header-value-1",
"test-header-value-2",
), ),
} }
span = next(span for span in span_list if span.kind == SpanKind.SERVER) span = next(span for span in span_list if span.kind == SpanKind.SERVER)

View File

@ -115,7 +115,7 @@ is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). Th
single item list containing all the header values. single item list containing all the header values.
For example: For example:
``http.request.header.custom_request_header = ["<value1>,<value2>"]`` ``http.request.header.custom_request_header = ["<value1>", "<value2>"]``
Response headers Response headers
**************** ****************
@ -146,10 +146,10 @@ To capture all response headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS
The name of the added span attribute will follow the format ``http.response.header.<header_name>`` where ``<header_name>`` The name of the added span attribute will follow the format ``http.response.header.<header_name>`` where ``<header_name>``
is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
single item list containing all the header values. list containing the header values.
For example: For example:
``http.response.header.custom_response_header = ["<value1>,<value2>"]`` ``http.response.header.custom_response_header = ["<value1>", "<value2>"]``
Sanitizing headers Sanitizing headers
****************** ******************

View File

@ -11,9 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import unittest import unittest
from collections.abc import Mapping
from timeit import default_timer from timeit import default_timer
from typing import Tuple
from unittest.mock import patch from unittest.mock import patch
import fastapi import fastapi
@ -557,6 +558,24 @@ class TestWrappedApplication(TestBase):
) )
class MultiMapping(Mapping):
def __init__(self, *items: Tuple[str, str]):
self._items = items
def __len__(self):
return len(self._items)
def __getitem__(self, __key):
raise NotImplementedError("use .items() instead")
def __iter__(self):
raise NotImplementedError("use .items() instead")
def items(self):
return self._items
@patch.dict( @patch.dict(
"os.environ", "os.environ",
{ {
@ -583,13 +602,15 @@ class TestHTTPAppWithCustomHeaders(TestBase):
@app.get("/foobar") @app.get("/foobar")
async def _(): async def _():
headers = { headers = MultiMapping(
"custom-test-header-1": "test-header-value-1", ("custom-test-header-1", "test-header-value-1"),
"custom-test-header-2": "test-header-value-2", ("custom-test-header-2", "test-header-value-2"),
"my-custom-regex-header-1": "my-custom-regex-value-1,my-custom-regex-value-2", ("my-custom-regex-header-1", "my-custom-regex-value-1"),
"My-Custom-Regex-Header-2": "my-custom-regex-value-3,my-custom-regex-value-4", ("my-custom-regex-header-1", "my-custom-regex-value-2"),
"My-Secret-Header": "My Secret Value", ("My-Custom-Regex-Header-2", "my-custom-regex-value-3"),
} ("My-Custom-Regex-Header-2", "my-custom-regex-value-4"),
("My-Secret-Header", "My Secret Value"),
)
content = {"message": "hello world"} content = {"message": "hello world"}
return JSONResponse(content=content, headers=headers) return JSONResponse(content=content, headers=headers)
@ -665,10 +686,12 @@ class TestHTTPAppWithCustomHeaders(TestBase):
"test-header-value-2", "test-header-value-2",
), ),
"http.response.header.my_custom_regex_header_1": ( "http.response.header.my_custom_regex_header_1": (
"my-custom-regex-value-1,my-custom-regex-value-2", "my-custom-regex-value-1",
"my-custom-regex-value-2",
), ),
"http.response.header.my_custom_regex_header_2": ( "http.response.header.my_custom_regex_header_2": (
"my-custom-regex-value-3,my-custom-regex-value-4", "my-custom-regex-value-3",
"my-custom-regex-value-4",
), ),
"http.response.header.my_secret_header": ("[REDACTED]",), "http.response.header.my_secret_header": ("[REDACTED]",),
} }

View File

@ -110,10 +110,10 @@ Additionally, the special keyword ``all`` can be used to capture all request hea
The name of the added span attribute will follow the format ``http.request.header.<header_name>`` where ``<header_name>`` The name of the added span attribute will follow the format ``http.request.header.<header_name>`` where ``<header_name>``
is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
single item list containing all the header values. list containing the header values.
For example: For example:
``http.request.header.custom_request_header = ["<value1>,<value2>"]`` ``http.request.header.custom_request_header = ["<value1>", "<value2>"]``
Response headers Response headers
**************** ****************
@ -144,10 +144,10 @@ Additionally, the special keyword ``all`` can be used to capture all response he
The name of the added span attribute will follow the format ``http.response.header.<header_name>`` where ``<header_name>`` The name of the added span attribute will follow the format ``http.response.header.<header_name>`` where ``<header_name>``
is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
single item list containing all the header values. list containing the header values.
For example: For example:
``http.response.header.custom_response_header = ["<value1>,<value2>"]`` ``http.response.header.custom_response_header = ["<value1>", "<value2>"]``
Sanitizing headers Sanitizing headers
****************** ******************

View File

@ -14,6 +14,7 @@
from __future__ import annotations from __future__ import annotations
from collections.abc import Mapping
from os import environ from os import environ
from re import IGNORECASE as RE_IGNORECASE from re import IGNORECASE as RE_IGNORECASE
from re import compile as re_compile from re import compile as re_compile
@ -87,32 +88,32 @@ class SanitizeValue:
def sanitize_header_values( def sanitize_header_values(
self, self,
headers: dict[str, str], headers: Mapping[str, str | list[str]],
header_regexes: list[str], header_regexes: list[str],
normalize_function: Callable[[str], str], normalize_function: Callable[[str], str],
) -> dict[str, str]: ) -> dict[str, list[str]]:
values: dict[str, str] = {} values: dict[str, list[str]] = {}
if header_regexes: if header_regexes:
header_regexes_compiled = re_compile( header_regexes_compiled = re_compile(
"|".join("^" + i + "$" for i in header_regexes), "|".join(header_regexes),
RE_IGNORECASE, RE_IGNORECASE,
) )
for header_name in list( for header_name, header_value in headers.items():
filter( if header_regexes_compiled.fullmatch(header_name):
header_regexes_compiled.match,
headers.keys(),
)
):
header_values = headers.get(header_name)
if header_values:
key = normalize_function(header_name.lower()) key = normalize_function(header_name.lower())
values[key] = [ if isinstance(header_value, str):
self.sanitize_header_value( values[key] = [
header=header_name, value=header_values self.sanitize_header_value(
) header_name, header_value
] )
]
else:
values[key] = [
self.sanitize_header_value(header_name, value)
for value in header_value
]
return values return values