diff --git a/CHANGELOG.md b/CHANGELOG.md index daa22b10e..f831617ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([679](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/679)) - `opentelemetry-exporter-richconsole` Initial release ([#686](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/686)) +- `opentelemetry-instrumentation-elasticsearch` no longer creates unique span names by including document IDs, replaces them with `:id` and puts the value in attribute `elasticsearch.id` + ([#705](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/705)) - `opentelemetry-instrumentation-tornado` now sets `http.client_ip` and `tornado.handler` attributes ([#706](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/706)) - `opentelemetry-instrumentation-requests` added exclude urls functionality diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py index db1939523..232877741 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py @@ -81,6 +81,7 @@ for example: es.get(index='my-index', doc_type='my-type', id=1) """ +import re from logging import getLogger from os import environ from typing import Collection @@ -107,7 +108,7 @@ _ATTRIBUTES_FROM_RESULT = [ "took", ] -_DEFALT_OP_NAME = "request" +_DEFAULT_OP_NAME = "request" class ElasticsearchInstrumentor(BaseInstrumentor): @@ -146,6 +147,9 @@ class ElasticsearchInstrumentor(BaseInstrumentor): unwrap(elasticsearch.Transport, "perform_request") +_regex_doc_url = re.compile(r"/_doc/([^/]+)") + + def _wrap_perform_request( tracer, span_name_prefix, request_hook=None, response_hook=None ): @@ -161,7 +165,24 @@ def _wrap_perform_request( len(args), ) - op_name = span_name_prefix + (url or method or _DEFALT_OP_NAME) + op_name = span_name_prefix + (url or method or _DEFAULT_OP_NAME) + doc_id = None + if url: + # TODO: This regex-based solution avoids creating an unbounded number of span names, but should be replaced by instrumenting individual Elasticsearch methods instead of Transport.perform_request() + # A limitation of the regex is that only the '_doc' mapping type is supported. Mapping types are deprecated since Elasticsearch 7 + # https://github.com/open-telemetry/opentelemetry-python-contrib/issues/708 + match = _regex_doc_url.search(url) + if match is not None: + # Remove the full document ID from the URL + doc_span = match.span() + op_name = ( + span_name_prefix + + url[: doc_span[0]] + + "/_doc/:id" + + url[doc_span[1] :] + ) + # Put the document ID in attributes + doc_id = match.group(1) params = kwargs.get("params", {}) body = kwargs.get("body", None) @@ -184,6 +205,8 @@ def _wrap_perform_request( attributes[SpanAttributes.DB_STATEMENT] = str(body) if params: attributes["elasticsearch.params"] = str(params) + if doc_id: + attributes["elasticsearch.id"] = doc_id for key, value in attributes.items(): span.set_attribute(key, value) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index a63350299..91e4df7ec 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -64,7 +64,7 @@ class TestElasticsearchIntegration(TestBase): request_mock.return_value = (1, {}, {}) es = Elasticsearch() - es.index(index="sw", doc_type="people", id=1, body={"name": "adam"}) + es.index(index="sw", doc_type="_doc", id=1, body={"name": "adam"}) spans_list = self.get_finished_spans() self.assertEqual(len(spans_list), 1) @@ -79,7 +79,7 @@ class TestElasticsearchIntegration(TestBase): # check that no spans are generated after uninstrument ElasticsearchInstrumentor().uninstrument() - es.index(index="sw", doc_type="people", id=1, body={"name": "adam"}) + es.index(index="sw", doc_type="_doc", id=1, body={"name": "adam"}) spans_list = self.get_finished_spans() self.assertEqual(len(spans_list), 1) @@ -119,7 +119,7 @@ class TestElasticsearchIntegration(TestBase): def _test_prefix(self, prefix): es = Elasticsearch() - es.index(index="sw", doc_type="people", id=1, body={"name": "adam"}) + es.index(index="sw", doc_type="_doc", id=1, body={"name": "adam"}) spans_list = self.get_finished_spans() self.assertEqual(len(spans_list), 1) @@ -133,11 +133,12 @@ class TestElasticsearchIntegration(TestBase): '{"found": false, "timed_out": true, "took": 7}', ) es = Elasticsearch() - es.get(index="test-index", doc_type="tweet", id=1) + es.get(index="test-index", doc_type="_doc", id=1) spans = self.get_finished_spans() self.assertEqual(1, len(spans)) + self.assertEqual(spans[0].name, "Elasticsearch/test-index/_doc/:id") self.assertEqual("False", spans[0].attributes["elasticsearch.found"]) self.assertEqual( "True", spans[0].attributes["elasticsearch.timed_out"] @@ -159,7 +160,7 @@ class TestElasticsearchIntegration(TestBase): def _test_trace_error(self, code, exc): es = Elasticsearch() try: - es.get(index="test-index", doc_type="tweet", id=1) + es.get(index="test-index", doc_type="_doc", id=1) except Exception: # pylint: disable=broad-except pass @@ -176,15 +177,13 @@ class TestElasticsearchIntegration(TestBase): request_mock.return_value = (1, {}, {}) es = Elasticsearch() with self.tracer.start_as_current_span("parent"): - es.index( - index="sw", doc_type="people", id=1, body={"name": "adam"} - ) + es.index(index="sw", doc_type="_doc", id=1, body={"name": "adam"}) spans = self.get_finished_spans() self.assertEqual(len(spans), 2) parent = spans.by_name("parent") - child = spans.by_name("Elasticsearch/sw/people/1") + child = spans.by_name("Elasticsearch/sw/_doc/:id") self.assertIsNotNone(child.parent) self.assertEqual(child.parent.span_id, parent.context.span_id) @@ -198,13 +197,13 @@ class TestElasticsearchIntegration(TestBase): # 3. Check the spans got different parents, and are in the expected order. def target1(parent_span): with trace.use_span(parent_span): - es.get(index="test-index", doc_type="tweet", id=1) + es.get(index="test-index", doc_type="_doc", id=1) ev.set() ev.wait() def target2(): ev.wait() - es.get(index="test-index", doc_type="tweet", id=2) + es.get(index="test-index", doc_type="_doc", id=2) ev.set() with self.tracer.start_as_current_span("parent") as span: @@ -220,8 +219,8 @@ class TestElasticsearchIntegration(TestBase): self.assertEqual(3, len(spans)) s1 = spans.by_name("parent") - s2 = spans.by_name("Elasticsearch/test-index/tweet/1") - s3 = spans.by_name("Elasticsearch/test-index/tweet/2") + s2 = spans.by_attr("elasticsearch.id", "1") + s3 = spans.by_attr("elasticsearch.id", "2") self.assertIsNotNone(s2.parent) self.assertEqual(s2.parent.span_id, s1.context.span_id) @@ -343,10 +342,9 @@ class TestElasticsearchIntegration(TestBase): ) es = Elasticsearch() index = "test-index" - doc_type = "tweet" doc_id = 1 kwargs = {"params": {"test": True}} - es.get(index=index, doc_type=doc_type, id=doc_id, **kwargs) + es.get(index=index, doc_type="_doc", id=doc_id, **kwargs) spans = self.get_finished_spans() @@ -355,7 +353,7 @@ class TestElasticsearchIntegration(TestBase): "GET", spans[0].attributes[request_hook_method_attribute] ) self.assertEqual( - f"/{index}/{doc_type}/{doc_id}", + f"/{index}/_doc/{doc_id}", spans[0].attributes[request_hook_url_attribute], ) self.assertEqual( @@ -390,7 +388,7 @@ class TestElasticsearchIntegration(TestBase): "hits": [ { "_index": "test-index", - "_type": "tweet", + "_type": "_doc", "_id": "1", "_score": 0.18232156, "_source": {"name": "tester"}, @@ -405,7 +403,7 @@ class TestElasticsearchIntegration(TestBase): json.dumps(response_payload), ) es = Elasticsearch() - es.get(index="test-index", doc_type="tweet", id=1) + es.get(index="test-index", doc_type="_doc", id=1) spans = self.get_finished_spans()