import re from .ext import http class FilterRequestsOnUrl(object): r"""Filter out traces from incoming http requests based on the request's url. This class takes as argument a list of regular expression patterns representing the urls to be excluded from tracing. A trace will be excluded if its root span contains a ``http.url`` tag and if this tag matches any of the provided regular expression using the standard python regexp match semantic (https://docs.python.org/2/library/re.html#re.match). :param list regexps: a list of regular expressions (or a single string) defining the urls that should be filtered out. Examples: To filter out http calls to domain api.example.com:: FilterRequestsOnUrl(r'http://api\\.example\\.com') To filter out http calls to all first level subdomains from example.com:: FilterRequestOnUrl(r'http://.*+\\.example\\.com') To filter out calls to both http://test.example.com and http://example.com/healthcheck:: FilterRequestOnUrl([r'http://test\\.example\\.com', r'http://example\\.com/healthcheck']) """ def __init__(self, regexps): if isinstance(regexps, str): regexps = [regexps] self._regexps = [re.compile(regexp) for regexp in regexps] def process_trace(self, trace): """ When the filter is registered in the tracer, process_trace is called by on each trace before it is sent to the agent, the returned value will be fed to the next filter in the list. If process_trace returns None, the whole trace is discarded. """ for span in trace: if span.parent_id is None and span.get_tag(http.URL) is not None: url = span.get_tag(http.URL) for regexp in self._regexps: if regexp.match(url): return None return trace