SQLAlchemy: Use SQL operation and DB name as the Span name (#254)

Current instrumentation uses the entire SQL query as the operation name
which makes traces very hard to read and understand in addition to
introducing high-cardinality issues. This commit fixes the problem by
using only the SQL operation name and the DB name instead of the entire
query.
This commit is contained in:
Owais Lone
2021-01-20 22:11:19 +05:30
committed by GitHub
parent d12f67fc31
commit 8c8f2785bd
8 changed files with 57 additions and 39 deletions

View File

@ -42,7 +42,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-wsgi` Return `None` for `CarrierGetter` if key not found
([#1374](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/233))
([#233](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/233))
- `opentelemetry-instrumentation-grpc` Comply with updated spec, rework tests
([#236](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/236))
- `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-falcon`, `opentelemetry-instrumentation-flask`, `opentelemetry-instrumentation-pyramid`, `opentelemetry-instrumentation-wsgi` Renamed `host.port` attribute to `net.host.port`
@ -57,6 +57,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#235](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/235))
- `opentelemetry-exporter-datadog` `opentelemetry-sdk-extension-aws` Fix reference to ids_generator in sdk
([#235](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/235))
- `opentelemetry-instrumentation-sqlalchemy` Use SQL operation and DB name as span name.
([#254](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/254))
## [0.16b1](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.16b1) - 2020-11-26

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from sqlalchemy.event import listen
from sqlalchemy.event import listen # pylint: disable=no-name-in-module
from opentelemetry import trace
from opentelemetry.instrumentation.sqlalchemy.version import __version__
@ -72,22 +72,38 @@ class EngineTracer:
listen(engine, "after_cursor_execute", self._after_cur_exec)
listen(engine, "handle_error", self._handle_error)
def _operation_name(self, db_name, statement):
parts = []
if isinstance(statement, str):
# otel spec recommends against parsing SQL queries. We are not trying to parse SQL
# but simply truncating the statement to the first word. This covers probably >95%
# use cases and uses the SQL statement in span name correctly as per the spec.
# For some very special cases it might not record the correct statement if the SQL
# dialect is too weird but in any case it shouldn't break anything.
parts.append(statement.split()[0])
if db_name:
parts.append(db_name)
if not parts:
return self.vendor
return " ".join(parts)
# pylint: disable=unused-argument
def _before_cur_exec(self, conn, cursor, statement, *args):
attrs, found = _get_attributes_from_url(conn.engine.url)
if not found:
attrs = _get_attributes_from_cursor(self.vendor, cursor, attrs)
db_name = attrs.get(_DB, "")
self.current_span = self.tracer.start_span(
statement, kind=trace.SpanKind.CLIENT
self._operation_name(db_name, statement),
kind=trace.SpanKind.CLIENT,
)
with self.tracer.use_span(self.current_span, end_on_exit=False):
if self.current_span.is_recording():
self.current_span.set_attribute(_STMT, statement)
self.current_span.set_attribute("db.system", self.vendor)
if not _set_attributes_from_url(
self.current_span, conn.engine.url
):
_set_attributes_from_cursor(
self.current_span, self.vendor, cursor
)
for key, value in attrs.items():
self.current_span.set_attribute(key, value)
# pylint: disable=unused-argument
def _after_cur_exec(self, conn, cursor, statement, *args):
@ -108,25 +124,22 @@ class EngineTracer:
self.current_span.end()
def _set_attributes_from_url(span: trace.Span, url):
def _get_attributes_from_url(url):
"""Set connection tags from the url. return true if successful."""
if span.is_recording():
if url.host:
span.set_attribute(_HOST, url.host)
if url.port:
span.set_attribute(_PORT, url.port)
if url.database:
span.set_attribute(_DB, url.database)
if url.username:
span.set_attribute(_USER, url.username)
return bool(url.host)
attrs = {}
if url.host:
attrs[_HOST] = url.host
if url.port:
attrs[_PORT] = url.port
if url.database:
attrs[_DB] = url.database
if url.username:
attrs[_USER] = url.username
return attrs, bool(url.host)
def _set_attributes_from_cursor(span: trace.Span, vendor, cursor):
def _get_attributes_from_cursor(vendor, cursor, attrs):
"""Attempt to set db connection attributes by introspecting the cursor."""
if not span.is_recording():
return
if vendor == "postgresql":
# pylint: disable=import-outside-toplevel
from psycopg2.extensions import parse_dsn
@ -135,6 +148,7 @@ def _set_attributes_from_cursor(span: trace.Span, vendor, cursor):
dsn = getattr(cursor.connection, "dsn", None)
if dsn:
data = parse_dsn(dsn)
span.set_attribute(_DB, data.get("dbname"))
span.set_attribute(_HOST, data.get("host"))
span.set_attribute(_PORT, int(data.get("port")))
attrs[_DB] = data.get("dbname")
attrs[_HOST] = data.get("host")
attrs[_PORT] = int(data.get("port"))
return attrs

View File

@ -35,7 +35,7 @@ class TestSqlalchemyInstrumentation(TestBase):
spans = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans), 1)
self.assertEqual(spans[0].name, "SELECT 1 + 1;")
self.assertEqual(spans[0].name, "SELECT :memory:")
self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT)
def test_not_recording(self):
@ -68,5 +68,5 @@ class TestSqlalchemyInstrumentation(TestBase):
spans = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans), 1)
self.assertEqual(spans[0].name, "SELECT 1 + 1;")
self.assertEqual(spans[0].name, "SELECT :memory:")
self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT)

View File

@ -110,6 +110,8 @@ class SQLAlchemyTestMixin(TestBase):
super().tearDown()
def _check_span(self, span, name):
if self.SQL_DB:
name = "{0} {1}".format(name, self.SQL_DB)
self.assertEqual(span.name, name)
self.assertEqual(span.attributes.get(_DB), self.SQL_DB)
self.assertIs(span.status.status_code, trace.status.StatusCode.UNSET)
@ -129,7 +131,7 @@ class SQLAlchemyTestMixin(TestBase):
stmt += "(?, ?)"
else:
stmt += "(%(id)s, %(name)s)"
self._check_span(span, stmt)
self._check_span(span, "INSERT")
self.assertIn("INSERT INTO players", span.attributes.get(_STMT))
self.check_meta(span)
@ -146,7 +148,7 @@ class SQLAlchemyTestMixin(TestBase):
stmt += "?"
else:
stmt += "%(name_1)s"
self._check_span(span, stmt)
self._check_span(span, "SELECT")
self.assertIn(
"SELECT players.id AS players_id, players.name AS players_name \nFROM players \nWHERE players.name",
span.attributes.get(_STMT),
@ -163,7 +165,7 @@ class SQLAlchemyTestMixin(TestBase):
spans = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans), 1)
span = spans[0]
self._check_span(span, stmt)
self._check_span(span, "SELECT")
self.assertEqual(span.attributes.get(_STMT), "SELECT * FROM players")
self.check_meta(span)
@ -188,4 +190,4 @@ class SQLAlchemyTestMixin(TestBase):
self.assertEqual(parent_span.name, "sqlalch_op")
self.assertEqual(parent_span.instrumentation_info.name, "sqlalch_svc")
self.assertEqual(child_span.name, stmt)
self.assertEqual(child_span.name, "SELECT " + self.SQL_DB)

View File

@ -56,7 +56,7 @@ class SQLAlchemyInstrumentTestCase(TestBase):
def test_engine_traced(self):
# ensures that the engine is traced
rows = self.conn.execute("SELECT 1").fetchall()
rows = self.conn.execute("SELECT").fetchall()
self.assertEqual(len(rows), 1)
traces = self.memory_exporter.get_finished_spans()
@ -64,6 +64,6 @@ class SQLAlchemyInstrumentTestCase(TestBase):
self.assertEqual(len(traces), 1)
span = traces[0]
# check subset of span fields
self.assertEqual(span.name, "SELECT 1")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertIs(span.status.status_code, trace.status.StatusCode.UNSET)
self.assertGreater((span.end_time - span.start_time), 0)

View File

@ -67,7 +67,7 @@ class MysqlConnectorTestCase(SQLAlchemyTestMixin):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, "SELECT * FROM a_wrong_table")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)

View File

@ -65,7 +65,7 @@ class PostgresTestCase(SQLAlchemyTestMixin):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, "SELECT * FROM a_wrong_table")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)

View File

@ -43,7 +43,7 @@ class SQLiteTestCase(SQLAlchemyTestMixin):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, stmt)
self.assertEqual(span.name, "SELECT :memory:")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)