Add exclude_list for urllib/urllib3 instrumentations (#1733)

* urllib

* urllib3

* Update __init__.py

* readme

* lint
This commit is contained in:
Leighton Chen
2023-03-30 18:10:23 -07:00
committed by GitHub
parent 5052190c13
commit 4e059b15d2
8 changed files with 217 additions and 6 deletions

View File

@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#1690](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1690)) ([#1690](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1690))
- Add metrics instrumentation for sqlalchemy - Add metrics instrumentation for sqlalchemy
([#1645](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1645)) ([#1645](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1645))
- Add `excluded_urls` functionality to `urllib` and `urllib3` instrumentations
([#1733](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1733))
### Fixed ### Fixed

View File

@ -51,7 +51,7 @@ API
import functools import functools
import types import types
from timeit import default_timer from timeit import default_timer
from typing import Callable, Collection, Iterable, Optional from typing import Callable, Collection, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
from requests.models import PreparedRequest, Response from requests.models import PreparedRequest, Response
@ -77,6 +77,7 @@ from opentelemetry.trace import SpanKind, Tracer, get_tracer
from opentelemetry.trace.span import Span from opentelemetry.trace.span import Span
from opentelemetry.trace.status import Status from opentelemetry.trace.status import Status
from opentelemetry.util.http import ( from opentelemetry.util.http import (
ExcludeList,
get_excluded_urls, get_excluded_urls,
parse_excluded_urls, parse_excluded_urls,
remove_url_credentials, remove_url_credentials,
@ -96,7 +97,7 @@ def _instrument(
duration_histogram: Histogram, duration_histogram: Histogram,
request_hook: _RequestHookT = None, request_hook: _RequestHookT = None,
response_hook: _ResponseHookT = None, response_hook: _ResponseHookT = None,
excluded_urls: Iterable[str] = None, excluded_urls: ExcludeList = None,
): ):
"""Enables tracing of all requests calls that go through """Enables tracing of all requests calls that go through
:code:`requests.session.Session.request` (this includes :code:`requests.session.Session.request` (this includes

View File

@ -16,6 +16,46 @@ Installation
pip install opentelemetry-instrumentation-urllib pip install opentelemetry-instrumentation-urllib
Configuration
-------------
Request/Response hooks
**********************
The urllib instrumentation supports extending tracing behavior with the help of
request and response hooks. These are functions that are called back by the instrumentation
right after a Span is created for a request and right before the span is finished processing a response respectively.
The hooks can be configured as follows:
.. code:: python
# `request_obj` is an instance of urllib.request.Request
def request_hook(span, request_obj):
pass
# `request_obj` is an instance of urllib.request.Request
# `response` is an instance of http.client.HTTPResponse
def response_hook(span, request_obj, response)
pass
URLLibInstrumentor.instrument(
request_hook=request_hook, response_hook=response_hook)
)
Exclude lists
*************
To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.
For example,
::
export OTEL_PYTHON_URLLIB_EXCLUDED_URLS="client/.*/info,healthcheck"
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
References References
---------- ----------

View File

@ -56,6 +56,20 @@ The hooks can be configured as follows:
request_hook=request_hook, response_hook=response_hook) request_hook=request_hook, response_hook=response_hook)
) )
Exclude lists
*************
To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.
For example,
::
export OTEL_PYTHON_URLLIB_EXCLUDED_URLS="client/.*/info,healthcheck"
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
API API
--- ---
""" """
@ -88,7 +102,14 @@ from opentelemetry.semconv.metrics import MetricInstruments
from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace import Span, SpanKind, get_tracer from opentelemetry.trace import Span, SpanKind, get_tracer
from opentelemetry.trace.status import Status from opentelemetry.trace.status import Status
from opentelemetry.util.http import remove_url_credentials from opentelemetry.util.http import (
ExcludeList,
get_excluded_urls,
parse_excluded_urls,
remove_url_credentials,
)
_excluded_urls_from_env = get_excluded_urls("URLLIB")
_RequestHookT = typing.Optional[typing.Callable[[Span, Request], None]] _RequestHookT = typing.Optional[typing.Callable[[Span, Request], None]]
_ResponseHookT = typing.Optional[ _ResponseHookT = typing.Optional[
@ -112,10 +133,12 @@ class URLLibInstrumentor(BaseInstrumentor):
``tracer_provider``: a TracerProvider, defaults to global ``tracer_provider``: a TracerProvider, defaults to global
``request_hook``: An optional callback invoked that is invoked right after a span is created. ``request_hook``: An optional callback invoked that is invoked right after a span is created.
``response_hook``: An optional callback which is invoked right before the span is finished processing a response ``response_hook``: An optional callback which is invoked right before the span is finished processing a response
``excluded_urls``: A string containing a comma-delimited
list of regexes used to exclude URLs from tracking
""" """
tracer_provider = kwargs.get("tracer_provider") tracer_provider = kwargs.get("tracer_provider")
tracer = get_tracer(__name__, __version__, tracer_provider) tracer = get_tracer(__name__, __version__, tracer_provider)
excluded_urls = kwargs.get("excluded_urls")
meter_provider = kwargs.get("meter_provider") meter_provider = kwargs.get("meter_provider")
meter = get_meter(__name__, __version__, meter_provider) meter = get_meter(__name__, __version__, meter_provider)
@ -126,6 +149,9 @@ class URLLibInstrumentor(BaseInstrumentor):
histograms, histograms,
request_hook=kwargs.get("request_hook"), request_hook=kwargs.get("request_hook"),
response_hook=kwargs.get("response_hook"), response_hook=kwargs.get("response_hook"),
excluded_urls=_excluded_urls_from_env
if excluded_urls is None
else parse_excluded_urls(excluded_urls),
) )
def _uninstrument(self, **kwargs): def _uninstrument(self, **kwargs):
@ -143,6 +169,7 @@ def _instrument(
histograms: Dict[str, Histogram], histograms: Dict[str, Histogram],
request_hook: _RequestHookT = None, request_hook: _RequestHookT = None,
response_hook: _ResponseHookT = None, response_hook: _ResponseHookT = None,
excluded_urls: ExcludeList = None,
): ):
"""Enables tracing of all requests calls that go through """Enables tracing of all requests calls that go through
:code:`urllib.Client._make_request`""" :code:`urllib.Client._make_request`"""
@ -174,8 +201,11 @@ def _instrument(
) or context.get_value(_SUPPRESS_HTTP_INSTRUMENTATION_KEY): ) or context.get_value(_SUPPRESS_HTTP_INSTRUMENTATION_KEY):
return call_wrapped() return call_wrapped()
method = request.get_method().upper()
url = request.full_url url = request.full_url
if excluded_urls and excluded_urls.url_disabled(url):
return call_wrapped()
method = request.get_method().upper()
span_name = f"HTTP {method}".strip() span_name = f"HTTP {method}".strip()

View File

@ -38,6 +38,7 @@ from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.test.mock_textmap import MockTextMapPropagator from opentelemetry.test.mock_textmap import MockTextMapPropagator
from opentelemetry.test.test_base import TestBase from opentelemetry.test.test_base import TestBase
from opentelemetry.trace import StatusCode from opentelemetry.trace import StatusCode
from opentelemetry.util.http import get_excluded_urls
# pylint: disable=too-many-public-methods # pylint: disable=too-many-public-methods
@ -52,6 +53,21 @@ class RequestsIntegrationTestBase(abc.ABC):
# pylint: disable=invalid-name # pylint: disable=invalid-name
def setUp(self): def setUp(self):
super().setUp() super().setUp()
self.env_patch = mock.patch.dict(
"os.environ",
{
"OTEL_PYTHON_URLLIB_EXCLUDED_URLS": "http://localhost/env_excluded_arg/123,env_excluded_noarg"
},
)
self.env_patch.start()
self.exclude_patch = mock.patch(
"opentelemetry.instrumentation.urllib._excluded_urls_from_env",
get_excluded_urls("URLLIB"),
)
self.exclude_patch.start()
URLLibInstrumentor().instrument() URLLibInstrumentor().instrument()
httpretty.enable() httpretty.enable()
httpretty.register_uri(httpretty.GET, self.URL, body=b"Hello!") httpretty.register_uri(httpretty.GET, self.URL, body=b"Hello!")
@ -125,6 +141,36 @@ class RequestsIntegrationTestBase(abc.ABC):
span, opentelemetry.instrumentation.urllib span, opentelemetry.instrumentation.urllib
) )
def test_excluded_urls_explicit(self):
url_201 = "http://httpbin.org/status/201"
httpretty.register_uri(
httpretty.GET,
url_201,
status=201,
)
URLLibInstrumentor().uninstrument()
URLLibInstrumentor().instrument(excluded_urls=".*/201")
self.perform_request(self.URL)
self.perform_request(url_201)
self.assert_span(num_spans=1)
def test_excluded_urls_from_env(self):
url = "http://localhost/env_excluded_arg/123"
httpretty.register_uri(
httpretty.GET,
url,
status=200,
)
URLLibInstrumentor().uninstrument()
URLLibInstrumentor().instrument()
self.perform_request(self.URL)
self.perform_request(url)
self.assert_span(num_spans=1)
def test_not_foundbasic(self): def test_not_foundbasic(self):
url_404 = "http://httpbin.org/status/404/" url_404 = "http://httpbin.org/status/404/"
httpretty.register_uri( httpretty.register_uri(

View File

@ -42,6 +42,20 @@ The hooks can be configured as follows:
request_hook=request_hook, response_hook=response_hook) request_hook=request_hook, response_hook=response_hook)
) )
Exclude lists
*************
To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB3_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.
For example,
::
export OTEL_PYTHON_URLLIB3_EXCLUDED_URLS="client/.*/info,healthcheck"
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
References References
---------- ----------

View File

@ -60,6 +60,20 @@ The hooks can be configured as follows:
request_hook=request_hook, response_hook=response_hook) request_hook=request_hook, response_hook=response_hook)
) )
Exclude lists
*************
To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB3_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.
For example,
::
export OTEL_PYTHON_URLLIB3_EXCLUDED_URLS="client/.*/info,healthcheck"
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
API API
--- ---
""" """
@ -92,8 +106,15 @@ from opentelemetry.semconv.metrics import MetricInstruments
from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer
from opentelemetry.trace.status import Status from opentelemetry.trace.status import Status
from opentelemetry.util.http import (
ExcludeList,
get_excluded_urls,
parse_excluded_urls,
)
from opentelemetry.util.http.httplib import set_ip_on_next_http_connection from opentelemetry.util.http.httplib import set_ip_on_next_http_connection
_excluded_urls_from_env = get_excluded_urls("URLLIB3")
_UrlFilterT = typing.Optional[typing.Callable[[str], str]] _UrlFilterT = typing.Optional[typing.Callable[[str], str]]
_RequestHookT = typing.Optional[ _RequestHookT = typing.Optional[
typing.Callable[ typing.Callable[
@ -138,10 +159,14 @@ class URLLib3Instrumentor(BaseInstrumentor):
``response_hook``: An optional callback which is invoked right before the span is finished processing a response. ``response_hook``: An optional callback which is invoked right before the span is finished processing a response.
``url_filter``: A callback to process the requested URL prior ``url_filter``: A callback to process the requested URL prior
to adding it as a span attribute. to adding it as a span attribute.
``excluded_urls``: A string containing a comma-delimited
list of regexes used to exclude URLs from tracking
""" """
tracer_provider = kwargs.get("tracer_provider") tracer_provider = kwargs.get("tracer_provider")
tracer = get_tracer(__name__, __version__, tracer_provider) tracer = get_tracer(__name__, __version__, tracer_provider)
excluded_urls = kwargs.get("excluded_urls")
meter_provider = kwargs.get("meter_provider") meter_provider = kwargs.get("meter_provider")
meter = get_meter(__name__, __version__, meter_provider) meter = get_meter(__name__, __version__, meter_provider)
@ -169,6 +194,9 @@ class URLLib3Instrumentor(BaseInstrumentor):
request_hook=kwargs.get("request_hook"), request_hook=kwargs.get("request_hook"),
response_hook=kwargs.get("response_hook"), response_hook=kwargs.get("response_hook"),
url_filter=kwargs.get("url_filter"), url_filter=kwargs.get("url_filter"),
excluded_urls=_excluded_urls_from_env
if excluded_urls is None
else parse_excluded_urls(excluded_urls),
) )
def _uninstrument(self, **kwargs): def _uninstrument(self, **kwargs):
@ -183,13 +211,17 @@ def _instrument(
request_hook: _RequestHookT = None, request_hook: _RequestHookT = None,
response_hook: _ResponseHookT = None, response_hook: _ResponseHookT = None,
url_filter: _UrlFilterT = None, url_filter: _UrlFilterT = None,
excluded_urls: ExcludeList = None,
): ):
def instrumented_urlopen(wrapped, instance, args, kwargs): def instrumented_urlopen(wrapped, instance, args, kwargs):
if _is_instrumentation_suppressed(): if _is_instrumentation_suppressed():
return wrapped(*args, **kwargs) return wrapped(*args, **kwargs)
method = _get_url_open_arg("method", args, kwargs).upper()
url = _get_url(instance, args, kwargs, url_filter) url = _get_url(instance, args, kwargs, url_filter)
if excluded_urls and excluded_urls.url_disabled(url):
return wrapped(*args, **kwargs)
method = _get_url_open_arg("method", args, kwargs).upper()
headers = _prepare_headers(kwargs) headers = _prepare_headers(kwargs)
body = _get_url_open_arg("body", args, kwargs) body = _get_url_open_arg("body", args, kwargs)

View File

@ -29,6 +29,7 @@ from opentelemetry.propagate import get_global_textmap, set_global_textmap
from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.test.mock_textmap import MockTextMapPropagator from opentelemetry.test.mock_textmap import MockTextMapPropagator
from opentelemetry.test.test_base import TestBase from opentelemetry.test.test_base import TestBase
from opentelemetry.util.http import get_excluded_urls
# pylint: disable=too-many-public-methods # pylint: disable=too-many-public-methods
@ -39,6 +40,21 @@ class TestURLLib3Instrumentor(TestBase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
self.env_patch = mock.patch.dict(
"os.environ",
{
"OTEL_PYTHON_URLLIB3_EXCLUDED_URLS": "http://localhost/env_excluded_arg/123,env_excluded_noarg"
},
)
self.env_patch.start()
self.exclude_patch = mock.patch(
"opentelemetry.instrumentation.urllib3._excluded_urls_from_env",
get_excluded_urls("URLLIB3"),
)
self.exclude_patch.start()
URLLib3Instrumentor().instrument() URLLib3Instrumentor().instrument()
httpretty.enable(allow_net_connect=False) httpretty.enable(allow_net_connect=False)
@ -158,6 +174,36 @@ class TestURLLib3Instrumentor(TestBase):
self.assert_success_span(response, url) self.assert_success_span(response, url)
def test_excluded_urls_explicit(self):
url_201 = "http://httpbin.org/status/201"
httpretty.register_uri(
httpretty.GET,
url_201,
status=201,
)
URLLib3Instrumentor().uninstrument()
URLLib3Instrumentor().instrument(excluded_urls=".*/201")
self.perform_request(self.HTTP_URL)
self.perform_request(url_201)
self.assert_span(num_spans=1)
def test_excluded_urls_from_env(self):
url = "http://localhost/env_excluded_arg/123"
httpretty.register_uri(
httpretty.GET,
url,
status=200,
)
URLLib3Instrumentor().uninstrument()
URLLib3Instrumentor().instrument()
self.perform_request(self.HTTP_URL)
self.perform_request(url)
self.assert_span(num_spans=1)
def test_uninstrument(self): def test_uninstrument(self):
URLLib3Instrumentor().uninstrument() URLLib3Instrumentor().uninstrument()