Strip leading comments from SQL queries when generating the span name. (#1434)

Co-authored-by: Srikanth Chekuri <srikanth.chekuri92@gmail.com> Co-authored-by: Diego Hurtado <ocelotl@users.noreply.github.com>
2025-08-01 17:34:38 +08:00 · 2022-11-18 08:01:07 -05:00
parent c613484390
commit be4ceec08c
9 changed files with 93 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  ([#1350](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1350))
 - `opentelemetry-instrumentation-starlette` Add support for regular expression matching and sanitization of HTTP headers.
  ([#1404](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1404))
+- Strip leading comments from SQL queries when generating the span name.
+  ([#1434](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1434))

 ### Fixed

--- a/instrumentation/opentelemetry-instrumentation-asyncpg/src/opentelemetry/instrumentation/asyncpg/init.py
+++ b/instrumentation/opentelemetry-instrumentation-asyncpg/src/opentelemetry/instrumentation/asyncpg/init.py
@ -34,6 +34,7 @@ API
 ---
 """

+import re
 from typing import Collection

 import asyncpg
@ -99,6 +100,7 @@ class AsyncPGInstrumentor(BaseInstrumentor):
        super().__init__()
        self.capture_parameters = capture_parameters
        self._tracer = None
+        self._leading_comment_remover = re.compile(r"^/\*.*?\*/")

    def instrumentation_dependencies(self) -> Collection[str]:
        return _instruments
@ -135,7 +137,8 @@ class AsyncPGInstrumentor(BaseInstrumentor):
        name = args[0] if args[0] else params.get("database", "postgresql")

        try:
-            name = name.split()[0]
+            # Strip leading comments so we get the operation name.
+            name = self._leading_comment_remover.sub("", name).split()[0]
        except IndexError:
            name = ""

--- a/instrumentation/opentelemetry-instrumentation-dbapi/src/opentelemetry/instrumentation/dbapi/init.py
+++ b/instrumentation/opentelemetry-instrumentation-dbapi/src/opentelemetry/instrumentation/dbapi/init.py
@ -39,6 +39,7 @@ API

 import functools
 import logging
+import re
 import typing

 import wrapt
@ -368,6 +369,7 @@ class CursorTracer:
            else {}
        )
        self._connect_module = self._db_api_integration.connect_module
+        self._leading_comment_remover = re.compile(r"^/\*.*?\*/")

    def _populate_span(
        self,
@ -397,7 +399,8 @@ class CursorTracer:

    def get_operation_name(self, cursor, args):  # pylint: disable=no-self-use
        if args and isinstance(args[0], str):
-            return args[0].split()[0]
+            # Strip leading comments so we get the operation name.
+            return self._leading_comment_remover.sub("", args[0]).split()[0]
        return ""

    def get_statement(self, cursor, args):  # pylint: disable=no-self-use
--- a/instrumentation/opentelemetry-instrumentation-dbapi/tests/test_dbapi_integration.py
+++ b/instrumentation/opentelemetry-instrumentation-dbapi/tests/test_dbapi_integration.py
@ -88,11 +88,17 @@ class TestDBApiIntegration(TestBase):
        query"""
        )
        cursor.execute("tab\tseparated query")
+        cursor.execute("/* leading comment */ query")
+        cursor.execute("/* leading comment */ query /* trailing comment */")
+        cursor.execute("query /* trailing comment */")
        spans_list = self.memory_exporter.get_finished_spans()
-        self.assertEqual(len(spans_list), 3)
+        self.assertEqual(len(spans_list), 6)
        self.assertEqual(spans_list[0].name, "Test")
        self.assertEqual(spans_list[1].name, "multi")
        self.assertEqual(spans_list[2].name, "tab")
+        self.assertEqual(spans_list[3].name, "query")
+        self.assertEqual(spans_list[4].name, "query")
+        self.assertEqual(spans_list[5].name, "query")

    def test_span_succeeded_with_capture_of_statement_parameters(self):
        connection_props = {
--- a/instrumentation/opentelemetry-instrumentation-psycopg2/src/opentelemetry/instrumentation/psycopg2/init.py
+++ b/instrumentation/opentelemetry-instrumentation-psycopg2/src/opentelemetry/instrumentation/psycopg2/init.py
@ -216,7 +216,8 @@ class CursorTracer(dbapi.CursorTracer):
            statement = statement.as_string(cursor)

        if isinstance(statement, str):
-            return statement.split()[0]
+            # Strip leading comments so we get the operation name.
+            return self._leading_comment_remover.sub("", statement).split()[0]

        return ""

--- a/instrumentation/opentelemetry-instrumentation-psycopg2/tests/test_psycopg2_integration.py
+++ b/instrumentation/opentelemetry-instrumentation-psycopg2/tests/test_psycopg2_integration.py
@ -116,6 +116,32 @@ class TestPostgresqlIntegration(TestBase):
        spans_list = self.memory_exporter.get_finished_spans()
        self.assertEqual(len(spans_list), 1)

+    def test_span_name(self):
+        Psycopg2Instrumentor().instrument()
+
+        cnx = psycopg2.connect(database="test")
+
+        cursor = cnx.cursor()
+
+        cursor.execute("Test query", ("param1Value", False))
+        cursor.execute(
+            """multi
+        line
+        query"""
+        )
+        cursor.execute("tab\tseparated query")
+        cursor.execute("/* leading comment */ query")
+        cursor.execute("/* leading comment */ query /* trailing comment */")
+        cursor.execute("query /* trailing comment */")
+        spans_list = self.memory_exporter.get_finished_spans()
+        self.assertEqual(len(spans_list), 6)
+        self.assertEqual(spans_list[0].name, "Test")
+        self.assertEqual(spans_list[1].name, "multi")
+        self.assertEqual(spans_list[2].name, "tab")
+        self.assertEqual(spans_list[3].name, "query")
+        self.assertEqual(spans_list[4].name, "query")
+        self.assertEqual(spans_list[5].name, "query")
+
    # pylint: disable=unused-argument
    def test_not_recording(self):
        mock_tracer = mock.Mock()
--- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py
+++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py
@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+import re

 from sqlalchemy.event import listen  # pylint: disable=no-name-in-module

@ -100,6 +101,7 @@ class EngineTracer:
        self.vendor = _normalize_vendor(engine.name)
        self.enable_commenter = enable_commenter
        self.commenter_options = commenter_options if commenter_options else {}
+        self._leading_comment_remover = re.compile(r"^/\*.*?\*/")

        listen(
            engine, "before_cursor_execute", self._before_cur_exec, retval=True
@ -115,7 +117,10 @@ class EngineTracer:
            # use cases and uses the SQL statement in span name correctly as per the spec.
            # For some very special cases it might not record the correct statement if the SQL
            # dialect is too weird but in any case it shouldn't break anything.
-            parts.append(statement.split()[0])
+            # Strip leading comments so we get the operation name.
+            parts.append(
+                self._leading_comment_remover.sub("", statement).split()[0]
+            )
        if db_name:
            parts.append(db_name)
        if not parts:
--- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py
+++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py
@ -42,15 +42,27 @@ class TestSqlalchemyInstrumentation(TestBase):
        )
        cnx = engine.connect()
        cnx.execute("SELECT	1 + 1;").fetchall()
+        cnx.execute("/* leading comment */ SELECT	1 + 1;").fetchall()
+        cnx.execute(
+            "/* leading comment */ SELECT	1 + 1; /* trailing comment */"
+        ).fetchall()
+        cnx.execute("SELECT	1 + 1; /* trailing comment */").fetchall()
        spans = self.memory_exporter.get_finished_spans()

-        self.assertEqual(len(spans), 2)
+        self.assertEqual(len(spans), 5)
        # first span - the connection to the db
        self.assertEqual(spans[0].name, "connect")
        self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT)
        # second span - the query itself
        self.assertEqual(spans[1].name, "SELECT :memory:")
        self.assertEqual(spans[1].kind, trace.SpanKind.CLIENT)
+        # spans for queries with comments
+        self.assertEqual(spans[2].name, "SELECT :memory:")
+        self.assertEqual(spans[2].kind, trace.SpanKind.CLIENT)
+        self.assertEqual(spans[3].name, "SELECT :memory:")
+        self.assertEqual(spans[3].kind, trace.SpanKind.CLIENT)
+        self.assertEqual(spans[4].name, "SELECT :memory:")
+        self.assertEqual(spans[4].kind, trace.SpanKind.CLIENT)

    def test_instrument_two_engines(self):
        engine_1 = create_engine("sqlite:///:memory:")
--- a/tests/opentelemetry-docker-tests/tests/asyncpg/test_asyncpg_functional.py
+++ b/tests/opentelemetry-docker-tests/tests/asyncpg/test_asyncpg_functional.py
@ -77,6 +77,35 @@ class TestFunctionalAsyncPG(TestBase):
            spans[0].attributes[SpanAttributes.DB_STATEMENT], "SELECT 42;"
        )

+    def test_instrumented_remove_comments(self, *_, **__):
+        async_call(self._connection.fetch("/* leading comment */ SELECT 42;"))
+        async_call(
+            self._connection.fetch(
+                "/* leading comment */ SELECT 42; /* trailing comment */"
+            )
+        )
+        async_call(self._connection.fetch("SELECT 42; /* trailing comment */"))
+        spans = self.memory_exporter.get_finished_spans()
+        self.assertEqual(len(spans), 3)
+        self.check_span(spans[0])
+        self.assertEqual(spans[0].name, "SELECT")
+        self.assertEqual(
+            spans[0].attributes[SpanAttributes.DB_STATEMENT],
+            "/* leading comment */ SELECT 42;",
+        )
+        self.check_span(spans[1])
+        self.assertEqual(spans[1].name, "SELECT")
+        self.assertEqual(
+            spans[1].attributes[SpanAttributes.DB_STATEMENT],
+            "/* leading comment */ SELECT 42; /* trailing comment */",
+        )
+        self.check_span(spans[2])
+        self.assertEqual(spans[2].name, "SELECT")
+        self.assertEqual(
+            spans[2].attributes[SpanAttributes.DB_STATEMENT],
+            "SELECT 42; /* trailing comment */",
+        )
+
    def test_instrumented_transaction_method(self, *_, **__):
        async def _transaction_execute():
            async with self._connection.transaction():