Add exclude_list for urllib/urllib3 instrumentations (#1733)

* urllib

* urllib3

* Update __init__.py

* readme

* lint
This commit is contained in:
Leighton Chen
2023-03-30 18:10:23 -07:00
committed by GitHub
parent 5052190c13
commit 4e059b15d2
8 changed files with 217 additions and 6 deletions

View File

@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#1690](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1690))
- Add metrics instrumentation for sqlalchemy
([#1645](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1645))
- Add `excluded_urls` functionality to `urllib` and `urllib3` instrumentations
([#1733](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1733))
### Fixed

View File

@ -51,7 +51,7 @@ API
import functools
import types
from timeit import default_timer
from typing import Callable, Collection, Iterable, Optional
from typing import Callable, Collection, Optional
from urllib.parse import urlparse
from requests.models import PreparedRequest, Response
@ -77,6 +77,7 @@ from opentelemetry.trace import SpanKind, Tracer, get_tracer
from opentelemetry.trace.span import Span
from opentelemetry.trace.status import Status
from opentelemetry.util.http import (
ExcludeList,
get_excluded_urls,
parse_excluded_urls,
remove_url_credentials,
@ -96,7 +97,7 @@ def _instrument(
duration_histogram: Histogram,
request_hook: _RequestHookT = None,
response_hook: _ResponseHookT = None,
excluded_urls: Iterable[str] = None,
excluded_urls: ExcludeList = None,
):
"""Enables tracing of all requests calls that go through
:code:`requests.session.Session.request` (this includes

View File

@ -16,6 +16,46 @@ Installation
pip install opentelemetry-instrumentation-urllib
Configuration
-------------
Request/Response hooks
**********************
The urllib instrumentation supports extending tracing behavior with the help of
request and response hooks. These are functions that are called back by the instrumentation
right after a Span is created for a request and right before the span is finished processing a response respectively.
The hooks can be configured as follows:
.. code:: python
# `request_obj` is an instance of urllib.request.Request
def request_hook(span, request_obj):
pass
# `request_obj` is an instance of urllib.request.Request
# `response` is an instance of http.client.HTTPResponse
def response_hook(span, request_obj, response)
pass
URLLibInstrumentor.instrument(
request_hook=request_hook, response_hook=response_hook)
)
Exclude lists
*************
To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.
For example,
::
export OTEL_PYTHON_URLLIB_EXCLUDED_URLS="client/.*/info,healthcheck"
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
References
----------

View File

@ -56,6 +56,20 @@ The hooks can be configured as follows:
request_hook=request_hook, response_hook=response_hook)
)
Exclude lists
*************
To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.
For example,
::
export OTEL_PYTHON_URLLIB_EXCLUDED_URLS="client/.*/info,healthcheck"
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
API
---
"""
@ -88,7 +102,14 @@ from opentelemetry.semconv.metrics import MetricInstruments
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace import Span, SpanKind, get_tracer
from opentelemetry.trace.status import Status
from opentelemetry.util.http import remove_url_credentials
from opentelemetry.util.http import (
ExcludeList,
get_excluded_urls,
parse_excluded_urls,
remove_url_credentials,
)
_excluded_urls_from_env = get_excluded_urls("URLLIB")
_RequestHookT = typing.Optional[typing.Callable[[Span, Request], None]]
_ResponseHookT = typing.Optional[
@ -112,10 +133,12 @@ class URLLibInstrumentor(BaseInstrumentor):
``tracer_provider``: a TracerProvider, defaults to global
``request_hook``: An optional callback invoked that is invoked right after a span is created.
``response_hook``: An optional callback which is invoked right before the span is finished processing a response
``excluded_urls``: A string containing a comma-delimited
list of regexes used to exclude URLs from tracking
"""
tracer_provider = kwargs.get("tracer_provider")
tracer = get_tracer(__name__, __version__, tracer_provider)
excluded_urls = kwargs.get("excluded_urls")
meter_provider = kwargs.get("meter_provider")
meter = get_meter(__name__, __version__, meter_provider)
@ -126,6 +149,9 @@ class URLLibInstrumentor(BaseInstrumentor):
histograms,
request_hook=kwargs.get("request_hook"),
response_hook=kwargs.get("response_hook"),
excluded_urls=_excluded_urls_from_env
if excluded_urls is None
else parse_excluded_urls(excluded_urls),
)
def _uninstrument(self, **kwargs):
@ -143,6 +169,7 @@ def _instrument(
histograms: Dict[str, Histogram],
request_hook: _RequestHookT = None,
response_hook: _ResponseHookT = None,
excluded_urls: ExcludeList = None,
):
"""Enables tracing of all requests calls that go through
:code:`urllib.Client._make_request`"""
@ -174,8 +201,11 @@ def _instrument(
) or context.get_value(_SUPPRESS_HTTP_INSTRUMENTATION_KEY):
return call_wrapped()
method = request.get_method().upper()
url = request.full_url
if excluded_urls and excluded_urls.url_disabled(url):
return call_wrapped()
method = request.get_method().upper()
span_name = f"HTTP {method}".strip()

View File

@ -38,6 +38,7 @@ from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.test.mock_textmap import MockTextMapPropagator
from opentelemetry.test.test_base import TestBase
from opentelemetry.trace import StatusCode
from opentelemetry.util.http import get_excluded_urls
# pylint: disable=too-many-public-methods
@ -52,6 +53,21 @@ class RequestsIntegrationTestBase(abc.ABC):
# pylint: disable=invalid-name
def setUp(self):
super().setUp()
self.env_patch = mock.patch.dict(
"os.environ",
{
"OTEL_PYTHON_URLLIB_EXCLUDED_URLS": "http://localhost/env_excluded_arg/123,env_excluded_noarg"
},
)
self.env_patch.start()
self.exclude_patch = mock.patch(
"opentelemetry.instrumentation.urllib._excluded_urls_from_env",
get_excluded_urls("URLLIB"),
)
self.exclude_patch.start()
URLLibInstrumentor().instrument()
httpretty.enable()
httpretty.register_uri(httpretty.GET, self.URL, body=b"Hello!")
@ -125,6 +141,36 @@ class RequestsIntegrationTestBase(abc.ABC):
span, opentelemetry.instrumentation.urllib
)
def test_excluded_urls_explicit(self):
url_201 = "http://httpbin.org/status/201"
httpretty.register_uri(
httpretty.GET,
url_201,
status=201,
)
URLLibInstrumentor().uninstrument()
URLLibInstrumentor().instrument(excluded_urls=".*/201")
self.perform_request(self.URL)
self.perform_request(url_201)
self.assert_span(num_spans=1)
def test_excluded_urls_from_env(self):
url = "http://localhost/env_excluded_arg/123"
httpretty.register_uri(
httpretty.GET,
url,
status=200,
)
URLLibInstrumentor().uninstrument()
URLLibInstrumentor().instrument()
self.perform_request(self.URL)
self.perform_request(url)
self.assert_span(num_spans=1)
def test_not_foundbasic(self):
url_404 = "http://httpbin.org/status/404/"
httpretty.register_uri(

View File

@ -42,6 +42,20 @@ The hooks can be configured as follows:
request_hook=request_hook, response_hook=response_hook)
)
Exclude lists
*************
To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB3_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.
For example,
::
export OTEL_PYTHON_URLLIB3_EXCLUDED_URLS="client/.*/info,healthcheck"
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
References
----------

View File

@ -60,6 +60,20 @@ The hooks can be configured as follows:
request_hook=request_hook, response_hook=response_hook)
)
Exclude lists
*************
To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB3_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.
For example,
::
export OTEL_PYTHON_URLLIB3_EXCLUDED_URLS="client/.*/info,healthcheck"
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
API
---
"""
@ -92,8 +106,15 @@ from opentelemetry.semconv.metrics import MetricInstruments
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer
from opentelemetry.trace.status import Status
from opentelemetry.util.http import (
ExcludeList,
get_excluded_urls,
parse_excluded_urls,
)
from opentelemetry.util.http.httplib import set_ip_on_next_http_connection
_excluded_urls_from_env = get_excluded_urls("URLLIB3")
_UrlFilterT = typing.Optional[typing.Callable[[str], str]]
_RequestHookT = typing.Optional[
typing.Callable[
@ -138,10 +159,14 @@ class URLLib3Instrumentor(BaseInstrumentor):
``response_hook``: An optional callback which is invoked right before the span is finished processing a response.
``url_filter``: A callback to process the requested URL prior
to adding it as a span attribute.
``excluded_urls``: A string containing a comma-delimited
list of regexes used to exclude URLs from tracking
"""
tracer_provider = kwargs.get("tracer_provider")
tracer = get_tracer(__name__, __version__, tracer_provider)
excluded_urls = kwargs.get("excluded_urls")
meter_provider = kwargs.get("meter_provider")
meter = get_meter(__name__, __version__, meter_provider)
@ -169,6 +194,9 @@ class URLLib3Instrumentor(BaseInstrumentor):
request_hook=kwargs.get("request_hook"),
response_hook=kwargs.get("response_hook"),
url_filter=kwargs.get("url_filter"),
excluded_urls=_excluded_urls_from_env
if excluded_urls is None
else parse_excluded_urls(excluded_urls),
)
def _uninstrument(self, **kwargs):
@ -183,13 +211,17 @@ def _instrument(
request_hook: _RequestHookT = None,
response_hook: _ResponseHookT = None,
url_filter: _UrlFilterT = None,
excluded_urls: ExcludeList = None,
):
def instrumented_urlopen(wrapped, instance, args, kwargs):
if _is_instrumentation_suppressed():
return wrapped(*args, **kwargs)
method = _get_url_open_arg("method", args, kwargs).upper()
url = _get_url(instance, args, kwargs, url_filter)
if excluded_urls and excluded_urls.url_disabled(url):
return wrapped(*args, **kwargs)
method = _get_url_open_arg("method", args, kwargs).upper()
headers = _prepare_headers(kwargs)
body = _get_url_open_arg("body", args, kwargs)

View File

@ -29,6 +29,7 @@ from opentelemetry.propagate import get_global_textmap, set_global_textmap
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.test.mock_textmap import MockTextMapPropagator
from opentelemetry.test.test_base import TestBase
from opentelemetry.util.http import get_excluded_urls
# pylint: disable=too-many-public-methods
@ -39,6 +40,21 @@ class TestURLLib3Instrumentor(TestBase):
def setUp(self):
super().setUp()
self.env_patch = mock.patch.dict(
"os.environ",
{
"OTEL_PYTHON_URLLIB3_EXCLUDED_URLS": "http://localhost/env_excluded_arg/123,env_excluded_noarg"
},
)
self.env_patch.start()
self.exclude_patch = mock.patch(
"opentelemetry.instrumentation.urllib3._excluded_urls_from_env",
get_excluded_urls("URLLIB3"),
)
self.exclude_patch.start()
URLLib3Instrumentor().instrument()
httpretty.enable(allow_net_connect=False)
@ -158,6 +174,36 @@ class TestURLLib3Instrumentor(TestBase):
self.assert_success_span(response, url)
def test_excluded_urls_explicit(self):
url_201 = "http://httpbin.org/status/201"
httpretty.register_uri(
httpretty.GET,
url_201,
status=201,
)
URLLib3Instrumentor().uninstrument()
URLLib3Instrumentor().instrument(excluded_urls=".*/201")
self.perform_request(self.HTTP_URL)
self.perform_request(url_201)
self.assert_span(num_spans=1)
def test_excluded_urls_from_env(self):
url = "http://localhost/env_excluded_arg/123"
httpretty.register_uri(
httpretty.GET,
url,
status=200,
)
URLLib3Instrumentor().uninstrument()
URLLib3Instrumentor().instrument()
self.perform_request(self.HTTP_URL)
self.perform_request(url)
self.assert_span(num_spans=1)
def test_uninstrument(self):
URLLib3Instrumentor().uninstrument()