Align gRPC server span status codes to OTEL specs (#2019)

This commit is contained in:
Filip Nikolovski
2024-03-07 18:33:28 +01:00
committed by GitHub
parent 46a8c59e03
commit 2e746198bf
6 changed files with 108 additions and 68 deletions

View File

@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased ## Unreleased
### Fixed
- Align gRPC span status codes to OTEL specification ([#1756](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/1756))
## Version 1.23.0/0.44b0 (2024-02-23) ## Version 1.23.0/0.44b0 (2024-02-23)
- Drop support for 3.7 - Drop support for 3.7

View File

@ -17,9 +17,9 @@ import grpc.aio
import wrapt import wrapt
from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace.status import Status, StatusCode
from ._server import OpenTelemetryServerInterceptor, _wrap_rpc_behavior from ._server import OpenTelemetryServerInterceptor, _wrap_rpc_behavior
from ._utilities import _server_status
# pylint:disable=abstract-method # pylint:disable=abstract-method
@ -36,12 +36,8 @@ class _OpenTelemetryAioServicerContext(wrapt.ObjectProxy):
self._self_active_span.set_attribute( self._self_active_span.set_attribute(
SpanAttributes.RPC_GRPC_STATUS_CODE, code.value[0] SpanAttributes.RPC_GRPC_STATUS_CODE, code.value[0]
) )
self._self_active_span.set_status( status = _server_status(code, details)
Status( self._self_active_span.set_status(status)
status_code=StatusCode.ERROR,
description=f"{code}:{details}",
)
)
return await self.__wrapped__.abort(code, details, trailing_metadata) return await self.__wrapped__.abort(code, details, trailing_metadata)
def set_code(self, code): def set_code(self, code):
@ -51,23 +47,15 @@ class _OpenTelemetryAioServicerContext(wrapt.ObjectProxy):
SpanAttributes.RPC_GRPC_STATUS_CODE, code.value[0] SpanAttributes.RPC_GRPC_STATUS_CODE, code.value[0]
) )
if code != grpc.StatusCode.OK: if code != grpc.StatusCode.OK:
self._self_active_span.set_status( status = _server_status(code, details)
Status( self._self_active_span.set_status(status)
status_code=StatusCode.ERROR,
description=f"{code}:{details}",
)
)
return self.__wrapped__.set_code(code) return self.__wrapped__.set_code(code)
def set_details(self, details): def set_details(self, details):
self._self_details = details self._self_details = details
if self._self_code != grpc.StatusCode.OK: if self._self_code != grpc.StatusCode.OK:
self._self_active_span.set_status( status = _server_status(self._self_code, details)
Status( self._self_active_span.set_status(status)
status_code=StatusCode.ERROR,
description=f"{self._self_code}:{details}",
)
)
return self.__wrapped__.set_details(details) return self.__wrapped__.set_details(details)

View File

@ -31,7 +31,8 @@ from opentelemetry import trace
from opentelemetry.context import attach, detach from opentelemetry.context import attach, detach
from opentelemetry.propagate import extract from opentelemetry.propagate import extract
from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace.status import Status, StatusCode
from ._utilities import _server_status
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -124,12 +125,8 @@ class _OpenTelemetryServicerContext(grpc.ServicerContext):
self._active_span.set_attribute( self._active_span.set_attribute(
SpanAttributes.RPC_GRPC_STATUS_CODE, code.value[0] SpanAttributes.RPC_GRPC_STATUS_CODE, code.value[0]
) )
self._active_span.set_status( status = _server_status(code, details)
Status( self._active_span.set_status(status)
status_code=StatusCode.ERROR,
description=f"{code}:{details}",
)
)
return self._servicer_context.abort(code, details) return self._servicer_context.abort(code, details)
def abort_with_status(self, status): def abort_with_status(self, status):
@ -158,23 +155,15 @@ class _OpenTelemetryServicerContext(grpc.ServicerContext):
SpanAttributes.RPC_GRPC_STATUS_CODE, code.value[0] SpanAttributes.RPC_GRPC_STATUS_CODE, code.value[0]
) )
if code != grpc.StatusCode.OK: if code != grpc.StatusCode.OK:
self._active_span.set_status( status = _server_status(code, details)
Status( self._active_span.set_status(status)
status_code=StatusCode.ERROR,
description=f"{code}:{details}",
)
)
return self._servicer_context.set_code(code) return self._servicer_context.set_code(code)
def set_details(self, details): def set_details(self, details):
self._details = details self._details = details
if self._code != grpc.StatusCode.OK: if self._code != grpc.StatusCode.OK:
self._active_span.set_status( status = _server_status(self._code, details)
Status( self._active_span.set_status(status)
status_code=StatusCode.ERROR,
description=f"{self._code}:{details}",
)
)
return self._servicer_context.set_details(details) return self._servicer_context.set_details(details)

View File

@ -14,6 +14,10 @@
"""Internal utilities.""" """Internal utilities."""
import grpc
from opentelemetry.trace.status import Status, StatusCode
class RpcInfo: class RpcInfo:
def __init__( def __init__(
@ -31,3 +35,21 @@ class RpcInfo:
self.request = request self.request = request
self.response = response self.response = response
self.error = error self.error = error
def _server_status(code, details):
error_status = Status(
status_code=StatusCode.ERROR, description=f"{code}:{details}"
)
status_codes = {
grpc.StatusCode.UNKNOWN: error_status,
grpc.StatusCode.DEADLINE_EXCEEDED: error_status,
grpc.StatusCode.UNIMPLEMENTED: error_status,
grpc.StatusCode.INTERNAL: error_status,
grpc.StatusCode.UNAVAILABLE: error_status,
grpc.StatusCode.DATA_LOSS: error_status,
}
return status_codes.get(
code, Status(status_code=StatusCode.UNSET, description="")
)

View File

@ -507,9 +507,7 @@ class TestOpenTelemetryAioServerInterceptor(TestBase, IsolatedAsyncioTestCase):
class AbortServicer(GRPCTestServerServicer): class AbortServicer(GRPCTestServerServicer):
# pylint:disable=C0103 # pylint:disable=C0103
async def SimpleMethod(self, request, context): async def SimpleMethod(self, request, context):
await context.abort( await context.abort(grpc.StatusCode.INTERNAL, failure_message)
grpc.StatusCode.FAILED_PRECONDITION, failure_message
)
testcase = self testcase = self
@ -520,9 +518,7 @@ class TestOpenTelemetryAioServerInterceptor(TestBase, IsolatedAsyncioTestCase):
with testcase.assertRaises(grpc.RpcError) as cm: with testcase.assertRaises(grpc.RpcError) as cm:
await channel.unary_unary(rpc_call)(msg) await channel.unary_unary(rpc_call)(msg)
self.assertEqual( self.assertEqual(cm.exception.code(), grpc.StatusCode.INTERNAL)
cm.exception.code(), grpc.StatusCode.FAILED_PRECONDITION
)
self.assertEqual(cm.exception.details(), failure_message) self.assertEqual(cm.exception.details(), failure_message)
await run_with_test_server(request, servicer=AbortServicer()) await run_with_test_server(request, servicer=AbortServicer())
@ -543,7 +539,7 @@ class TestOpenTelemetryAioServerInterceptor(TestBase, IsolatedAsyncioTestCase):
self.assertEqual(span.status.status_code, StatusCode.ERROR) self.assertEqual(span.status.status_code, StatusCode.ERROR)
self.assertEqual( self.assertEqual(
span.status.description, span.status.description,
f"{grpc.StatusCode.FAILED_PRECONDITION}:{failure_message}", f"{grpc.StatusCode.INTERNAL}:{failure_message}",
) )
# Check attributes # Check attributes
@ -555,7 +551,7 @@ class TestOpenTelemetryAioServerInterceptor(TestBase, IsolatedAsyncioTestCase):
SpanAttributes.RPC_METHOD: "SimpleMethod", SpanAttributes.RPC_METHOD: "SimpleMethod",
SpanAttributes.RPC_SERVICE: "GRPCTestServer", SpanAttributes.RPC_SERVICE: "GRPCTestServer",
SpanAttributes.RPC_SYSTEM: "grpc", SpanAttributes.RPC_SYSTEM: "grpc",
SpanAttributes.RPC_GRPC_STATUS_CODE: grpc.StatusCode.FAILED_PRECONDITION.value[ SpanAttributes.RPC_GRPC_STATUS_CODE: grpc.StatusCode.INTERNAL.value[
0 0
], ],
}, },
@ -605,11 +601,8 @@ class TestOpenTelemetryAioServerInterceptor(TestBase, IsolatedAsyncioTestCase):
) )
# make sure this span errored, with the right status and detail # make sure this span errored, with the right status and detail
self.assertEqual(span.status.status_code, StatusCode.ERROR) self.assertEqual(span.status.status_code, StatusCode.UNSET)
self.assertEqual( self.assertEqual(span.status.description, None)
span.status.description,
f"{grpc.StatusCode.FAILED_PRECONDITION}:{failure_message}",
)
# Check attributes # Check attributes
self.assertSpanHasAttributes( self.assertSpanHasAttributes(

View File

@ -552,28 +552,45 @@ class TestOpenTelemetryServerInterceptor(TestBase):
# our detailed failure message # our detailed failure message
failure_message = "This is a test failure" failure_message = "This is a test failure"
# aborting RPC handler # aborting RPC handlers
def handler(request, context): def error_status_handler(request, context):
context.abort(grpc.StatusCode.INTERNAL, failure_message)
def unset_status_handler(request, context):
context.abort(grpc.StatusCode.FAILED_PRECONDITION, failure_message) context.abort(grpc.StatusCode.FAILED_PRECONDITION, failure_message)
with self.server( rpc_call_error = "TestServicer/error_status_handler"
max_workers=1, rpc_call_unset = "TestServicer/unset_status_handler"
interceptors=[interceptor],
) as (server, channel):
server.add_generic_rpc_handlers((UnaryUnaryRpcHandler(handler),))
rpc_call = "TestServicer/handler"
server.start() rpc_calls = {
# unfortunately, these are just bare exceptions in grpc... rpc_call_error: error_status_handler,
with self.assertRaises(Exception): rpc_call_unset: unset_status_handler,
channel.unary_unary(rpc_call)(b"") }
server.stop(None)
for rpc_call, handler in rpc_calls.items():
with self.server(
max_workers=1,
interceptors=[interceptor],
) as (server, channel):
server.add_generic_rpc_handlers(
(UnaryUnaryRpcHandler(handler),)
)
server.start()
with self.assertRaises(Exception):
channel.unary_unary(rpc_call)(b"")
# unfortunately, these are just bare exceptions in grpc...
server.stop(None)
spans_list = self.memory_exporter.get_finished_spans() spans_list = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans_list), 1) self.assertEqual(len(spans_list), 2)
# check error span
span = spans_list[0] span = spans_list[0]
self.assertEqual(span.name, rpc_call) self.assertEqual(span.name, rpc_call_error)
self.assertIs(span.kind, trace.SpanKind.SERVER) self.assertIs(span.kind, trace.SpanKind.SERVER)
# Check version and name in span's instrumentation info # Check version and name in span's instrumentation info
@ -585,7 +602,7 @@ class TestOpenTelemetryServerInterceptor(TestBase):
self.assertEqual(span.status.status_code, StatusCode.ERROR) self.assertEqual(span.status.status_code, StatusCode.ERROR)
self.assertEqual( self.assertEqual(
span.status.description, span.status.description,
f"{grpc.StatusCode.FAILED_PRECONDITION}:{failure_message}", f"{grpc.StatusCode.INTERNAL}:{failure_message}",
) )
# Check attributes # Check attributes
@ -593,7 +610,35 @@ class TestOpenTelemetryServerInterceptor(TestBase):
span, span,
{ {
**self.net_peer_span_attributes, **self.net_peer_span_attributes,
SpanAttributes.RPC_METHOD: "handler", SpanAttributes.RPC_METHOD: "error_status_handler",
SpanAttributes.RPC_SERVICE: "TestServicer",
SpanAttributes.RPC_SYSTEM: "grpc",
SpanAttributes.RPC_GRPC_STATUS_CODE: grpc.StatusCode.INTERNAL.value[
0
],
},
)
# check unset status span
span = spans_list[1]
self.assertEqual(span.name, rpc_call_unset)
self.assertIs(span.kind, trace.SpanKind.SERVER)
# Check version and name in span's instrumentation info
self.assertEqualSpanInstrumentationInfo(
span, opentelemetry.instrumentation.grpc
)
self.assertEqual(span.status.description, None)
self.assertEqual(span.status.status_code, StatusCode.UNSET)
# Check attributes
self.assertSpanHasAttributes(
span,
{
**self.net_peer_span_attributes,
SpanAttributes.RPC_METHOD: "unset_status_handler",
SpanAttributes.RPC_SERVICE: "TestServicer", SpanAttributes.RPC_SERVICE: "TestServicer",
SpanAttributes.RPC_SYSTEM: "grpc", SpanAttributes.RPC_SYSTEM: "grpc",
SpanAttributes.RPC_GRPC_STATUS_CODE: grpc.StatusCode.FAILED_PRECONDITION.value[ SpanAttributes.RPC_GRPC_STATUS_CODE: grpc.StatusCode.FAILED_PRECONDITION.value[