botocore: add genai metrics for bedrock extensions (#3326)

This add genai metrics regarding the operation duration and the tokens used to botocore bedrock extension.
This commit is contained in:
Riccardo Magliocchetti
2025-03-05 10:14:39 +01:00
committed by GitHub
parent 81eaea57f9
commit 0ff1032bdd
8 changed files with 534 additions and 18 deletions

View File

@ -25,6 +25,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#3275](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3275))
- `opentelemetry-instrumentation-botocore` Add support for GenAI tool events
([#3302](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3302))
- `opentelemetry-instrumentation-botocore` Add support for GenAI metrics
([#3326](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3326))
- `opentelemetry-instrumentation` make it simpler to initialize auto-instrumentation programmatically
([#3273](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3273))
- Add `opentelemetry-instrumentation-vertexai>=2.0b0` to `opentelemetry-bootstrap`

View File

@ -26,7 +26,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"opentelemetry-api ~= 1.12",
"opentelemetry-api ~= 1.30",
"opentelemetry-instrumentation == 0.52b0.dev",
"opentelemetry-semantic-conventions == 0.52b0.dev",
"opentelemetry-propagator-aws-xray ~= 1.0",

View File

@ -76,6 +76,38 @@ for example:
)
ec2 = self.session.create_client("ec2", region_name="us-west-2")
ec2.describe_instances()
Extensions
----------
The instrumentation supports creating extensions for AWS services for enriching what is collected. We have extensions
for the following AWS services:
- Bedrock Runtime
- DynamoDB
- Lambda
- SNS
- SQS
Bedrock Runtime
***************
This extension implements the GenAI semantic conventions for the following API calls:
- Converse
- ConverseStream
- InvokeModel
- InvokeModelWithResponseStream
For the Converse and ConverseStream APIs tracing, events and metrics are implemented.
For the InvokeModel and InvokeModelWithResponseStream APIs tracing, events and metrics implemented only for a subset of
the available models, namely:
- Amazon Titan models
- Amazon Nova models
- Anthropic Claude
There is no support for tool calls with Amazon Models for the InvokeModel and InvokeModelWithResponseStream APIs.
"""
import logging
@ -104,6 +136,7 @@ from opentelemetry.instrumentation.utils import (
suppress_http_instrumentation,
unwrap,
)
from opentelemetry.metrics import Instrument, Meter, get_meter
from opentelemetry.propagators.aws.aws_xray_propagator import AwsXRayPropagator
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace import get_tracer
@ -134,6 +167,10 @@ class BotocoreInstrumentor(BaseInstrumentor):
self._tracers = {}
# event_loggers are lazy initialized per-extension in _get_event_logger
self._event_loggers = {}
# meters are lazy initialized per-extension in _get_meter
self._meters = {}
# metrics are lazy initialized per-extension in _get_metrics
self._metrics: Dict[str, Dict[str, Instrument]] = {}
self.request_hook = kwargs.get("request_hook")
self.response_hook = kwargs.get("response_hook")
@ -144,6 +181,7 @@ class BotocoreInstrumentor(BaseInstrumentor):
self.tracer_provider = kwargs.get("tracer_provider")
self.event_logger_provider = kwargs.get("event_logger_provider")
self.meter_provider = kwargs.get("meter_provider")
wrap_function_wrapper(
"botocore.client",
@ -201,6 +239,38 @@ class BotocoreInstrumentor(BaseInstrumentor):
return self._event_loggers[instrumentation_name]
def _get_meter(self, extension: _AwsSdkExtension):
"""This is a multiplexer in order to have a meter per extension"""
instrumentation_name = self._get_instrumentation_name(extension)
meter = self._meters.get(instrumentation_name)
if meter:
return meter
schema_version = extension.meter_schema_version()
self._meters[instrumentation_name] = get_meter(
instrumentation_name,
"",
schema_url=f"https://opentelemetry.io/schemas/{schema_version}",
meter_provider=self.meter_provider,
)
return self._meters[instrumentation_name]
def _get_metrics(
self, extension: _AwsSdkExtension, meter: Meter
) -> Dict[str, Instrument]:
"""This is a multiplexer for lazy initialization of metrics required by extensions"""
instrumentation_name = self._get_instrumentation_name(extension)
metrics = self._metrics.get(instrumentation_name)
if metrics is not None:
return metrics
self._metrics.setdefault(instrumentation_name, {})
metrics = self._metrics[instrumentation_name]
_safe_invoke(extension.setup_metrics, meter, metrics)
return metrics
def _uninstrument(self, **kwargs):
unwrap(BaseClient, "_make_api_call")
unwrap(Endpoint, "prepare_request")
@ -244,8 +314,11 @@ class BotocoreInstrumentor(BaseInstrumentor):
tracer = self._get_tracer(extension)
event_logger = self._get_event_logger(extension)
meter = self._get_meter(extension)
metrics = self._get_metrics(extension, meter)
instrumentor_ctx = _BotocoreInstrumentorContext(
event_logger=event_logger
event_logger=event_logger,
metrics=metrics,
)
with tracer.start_as_current_span(
call_context.span_name,

View File

@ -21,6 +21,7 @@ from __future__ import annotations
import io
import json
import logging
from timeit import default_timer
from typing import Any
from botocore.eventstream import EventStream
@ -39,6 +40,7 @@ from opentelemetry.instrumentation.botocore.extensions.types import (
_BotoClientErrorT,
_BotocoreInstrumentorContext,
)
from opentelemetry.metrics import Instrument, Meter
from opentelemetry.semconv._incubating.attributes.error_attributes import (
ERROR_TYPE,
)
@ -51,16 +53,56 @@ from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
GEN_AI_REQUEST_TOP_P,
GEN_AI_RESPONSE_FINISH_REASONS,
GEN_AI_SYSTEM,
GEN_AI_TOKEN_TYPE,
GEN_AI_USAGE_INPUT_TOKENS,
GEN_AI_USAGE_OUTPUT_TOKENS,
GenAiOperationNameValues,
GenAiSystemValues,
GenAiTokenTypeValues,
)
from opentelemetry.semconv._incubating.metrics.gen_ai_metrics import (
GEN_AI_CLIENT_OPERATION_DURATION,
GEN_AI_CLIENT_TOKEN_USAGE,
)
from opentelemetry.trace.span import Span
from opentelemetry.trace.status import Status, StatusCode
_logger = logging.getLogger(__name__)
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
0.01,
0.02,
0.04,
0.08,
0.16,
0.32,
0.64,
1.28,
2.56,
5.12,
10.24,
20.48,
40.96,
81.92,
]
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
1,
4,
16,
64,
256,
1024,
4096,
16384,
65536,
262144,
1048576,
4194304,
16777216,
67108864,
]
_MODEL_ID_KEY: str = "modelId"
@ -88,6 +130,40 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
not in self._DONT_CLOSE_SPAN_ON_END_OPERATIONS
)
def setup_metrics(self, meter: Meter, metrics: dict[str, Instrument]):
metrics[GEN_AI_CLIENT_OPERATION_DURATION] = meter.create_histogram(
name=GEN_AI_CLIENT_OPERATION_DURATION,
description="GenAI operation duration",
unit="s",
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
)
metrics[GEN_AI_CLIENT_TOKEN_USAGE] = meter.create_histogram(
name=GEN_AI_CLIENT_TOKEN_USAGE,
description="Measures number of input and output tokens used",
unit="{token}",
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
)
def _extract_metrics_attributes(self) -> _AttributeMapT:
attributes = {GEN_AI_SYSTEM: GenAiSystemValues.AWS_BEDROCK.value}
model_id = self._call_context.params.get(_MODEL_ID_KEY)
if not model_id:
return attributes
attributes[GEN_AI_REQUEST_MODEL] = model_id
# titan in invoke model is a text completion one
if "body" in self._call_context.params and "amazon.titan" in model_id:
attributes[GEN_AI_OPERATION_NAME] = (
GenAiOperationNameValues.TEXT_COMPLETION.value
)
else:
attributes[GEN_AI_OPERATION_NAME] = (
GenAiOperationNameValues.CHAT.value
)
return attributes
def extract_attributes(self, attributes: _AttributeMapT):
if self._call_context.operation not in self._HANDLED_OPERATIONS:
return
@ -251,16 +327,18 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
for event in message_to_event(message, capture_content):
event_logger.emit(event)
if not span.is_recording():
return
if span.is_recording():
operation_name = span.attributes.get(GEN_AI_OPERATION_NAME, "")
request_model = span.attributes.get(GEN_AI_REQUEST_MODEL, "")
# avoid setting to an empty string if are not available
if operation_name and request_model:
span.update_name(f"{operation_name} {request_model}")
operation_name = span.attributes.get(GEN_AI_OPERATION_NAME, "")
request_model = span.attributes.get(GEN_AI_REQUEST_MODEL, "")
# avoid setting to an empty string if are not available
if operation_name and request_model:
span.update_name(f"{operation_name} {request_model}")
# this is used to calculate the operation duration metric, duration may be skewed by request_hook
# pylint: disable=attribute-defined-outside-init
self._operation_start = default_timer()
# pylint: disable=no-self-use
# pylint: disable=no-self-use,too-many-locals
def _converse_on_success(
self,
span: Span,
@ -300,6 +378,37 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
)
)
metrics = instrumentor_context.metrics
metrics_attributes = self._extract_metrics_attributes()
if operation_duration_histogram := metrics.get(
GEN_AI_CLIENT_OPERATION_DURATION
):
duration = max((default_timer() - self._operation_start), 0)
operation_duration_histogram.record(
duration,
attributes=metrics_attributes,
)
if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
if usage := result.get("usage"):
if input_tokens := usage.get("inputTokens"):
input_attributes = {
**metrics_attributes,
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
}
token_usage_histogram.record(
input_tokens, input_attributes
)
if output_tokens := usage.get("outputTokens"):
output_attributes = {
**metrics_attributes,
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
}
token_usage_histogram.record(
output_tokens, output_attributes
)
def _invoke_model_on_success(
self,
span: Span,
@ -338,12 +447,31 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
if original_body is not None:
original_body.close()
def _on_stream_error_callback(self, span: Span, exception):
def _on_stream_error_callback(
self,
span: Span,
exception,
instrumentor_context: _BotocoreInstrumentorContext,
):
span.set_status(Status(StatusCode.ERROR, str(exception)))
if span.is_recording():
span.set_attribute(ERROR_TYPE, type(exception).__qualname__)
span.end()
metrics = instrumentor_context.metrics
metrics_attributes = {
**self._extract_metrics_attributes(),
ERROR_TYPE: type(exception).__qualname__,
}
if operation_duration_histogram := metrics.get(
GEN_AI_CLIENT_OPERATION_DURATION
):
duration = max((default_timer() - self._operation_start), 0)
operation_duration_histogram.record(
duration,
attributes=metrics_attributes,
)
def on_success(
self,
span: Span,
@ -367,7 +495,9 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
span.end()
def stream_error_callback(exception):
self._on_stream_error_callback(span, exception)
self._on_stream_error_callback(
span, exception, instrumentor_context
)
result["stream"] = ConverseStreamWrapper(
result["stream"],
@ -405,7 +535,9 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
span.end()
def invoke_model_stream_error_callback(exception):
self._on_stream_error_callback(span, exception)
self._on_stream_error_callback(
span, exception, instrumentor_context
)
result["body"] = InvokeModelWithResponseStreamWrapper(
result["body"],
@ -415,7 +547,7 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
)
return
# pylint: disable=no-self-use
# pylint: disable=no-self-use,too-many-locals
def _handle_amazon_titan_response(
self,
span: Span,
@ -445,7 +577,38 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
)
event_logger.emit(choice.to_choice_event())
# pylint: disable=no-self-use
metrics = instrumentor_context.metrics
metrics_attributes = self._extract_metrics_attributes()
if operation_duration_histogram := metrics.get(
GEN_AI_CLIENT_OPERATION_DURATION
):
duration = max((default_timer() - self._operation_start), 0)
operation_duration_histogram.record(
duration,
attributes=metrics_attributes,
)
if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
if input_tokens := response_body.get("inputTextTokenCount"):
input_attributes = {
**metrics_attributes,
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
}
token_usage_histogram.record(
input_tokens, input_attributes
)
if results := response_body.get("results"):
if output_tokens := results[0].get("tokenCount"):
output_attributes = {
**metrics_attributes,
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
}
token_usage_histogram.record(
output_tokens, output_attributes
)
# pylint: disable=no-self-use,too-many-locals
def _handle_amazon_nova_response(
self,
span: Span,
@ -472,6 +635,37 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
choice = _Choice.from_converse(response_body, capture_content)
event_logger.emit(choice.to_choice_event())
metrics = instrumentor_context.metrics
metrics_attributes = self._extract_metrics_attributes()
if operation_duration_histogram := metrics.get(
GEN_AI_CLIENT_OPERATION_DURATION
):
duration = max((default_timer() - self._operation_start), 0)
operation_duration_histogram.record(
duration,
attributes=metrics_attributes,
)
if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
if usage := response_body.get("usage"):
if input_tokens := usage.get("inputTokens"):
input_attributes = {
**metrics_attributes,
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
}
token_usage_histogram.record(
input_tokens, input_attributes
)
if output_tokens := usage.get("outputTokens"):
output_attributes = {
**metrics_attributes,
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
}
token_usage_histogram.record(
output_tokens, output_attributes
)
# pylint: disable=no-self-use
def _handle_anthropic_claude_response(
self,
@ -500,6 +694,37 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
)
event_logger.emit(choice.to_choice_event())
metrics = instrumentor_context.metrics
metrics_attributes = self._extract_metrics_attributes()
if operation_duration_histogram := metrics.get(
GEN_AI_CLIENT_OPERATION_DURATION
):
duration = max((default_timer() - self._operation_start), 0)
operation_duration_histogram.record(
duration,
attributes=metrics_attributes,
)
if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
if usage := response_body.get("usage"):
if input_tokens := usage.get("input_tokens"):
input_attributes = {
**metrics_attributes,
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
}
token_usage_histogram.record(
input_tokens, input_attributes
)
if output_tokens := usage.get("output_tokens"):
output_attributes = {
**metrics_attributes,
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
}
token_usage_histogram.record(
output_tokens, output_attributes
)
def on_error(
self,
span: Span,
@ -515,3 +740,17 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
if not self.should_end_span_on_exit():
span.end()
metrics = instrumentor_context.metrics
metrics_attributes = {
**self._extract_metrics_attributes(),
ERROR_TYPE: type(exception).__qualname__,
}
if operation_duration_histogram := metrics.get(
GEN_AI_CLIENT_OPERATION_DURATION
):
duration = max((default_timer() - self._operation_start), 0)
operation_duration_histogram.record(
duration,
attributes=metrics_attributes,
)

View File

@ -12,10 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import logging
from typing import Any, Dict, Optional, Tuple
from opentelemetry._events import EventLogger
from opentelemetry.metrics import Instrument, Meter
from opentelemetry.trace import SpanKind
from opentelemetry.trace.span import Span
from opentelemetry.util.types import AttributeValue
@ -91,8 +94,13 @@ class _AwsSdkCallContext:
class _BotocoreInstrumentorContext:
def __init__(self, event_logger: EventLogger):
def __init__(
self,
event_logger: EventLogger,
metrics: Dict[str, Instrument] | None = None,
):
self.event_logger = event_logger
self.metrics = metrics or {}
class _AwsSdkExtension:
@ -109,6 +117,11 @@ class _AwsSdkExtension:
"""Returns the event logger OTel schema version the extension is following"""
return "1.30.0"
@staticmethod
def meter_schema_version() -> str:
"""Returns the meter OTel schema version the extension is following"""
return "1.30.0"
def should_trace_service_call(self) -> bool: # pylint:disable=no-self-use
"""Returns if the AWS SDK service call should be traced or not
@ -125,6 +138,12 @@ class _AwsSdkExtension:
"""
return True
def setup_metrics(self, meter: Meter, metrics: Dict[str, Instrument]):
"""Callback which gets invoked to setup metrics.
Extensions might override this function to add to the metrics dictionary all the metrics
they want to receive later in _BotocoreInstrumentorContext."""
def extract_attributes(self, attributes: _AttributeMapT):
"""Callback which gets invoked before the span is created.

View File

@ -19,6 +19,11 @@ from typing import Any
from botocore.response import StreamingBody
from opentelemetry.instrumentation.botocore.extensions.bedrock import (
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
)
from opentelemetry.sdk.metrics._internal.point import ResourceMetrics
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.semconv._incubating.attributes import (
event_attributes as EventAttributes,
@ -26,6 +31,13 @@ from opentelemetry.semconv._incubating.attributes import (
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv._incubating.attributes.error_attributes import (
ERROR_TYPE,
)
from opentelemetry.semconv._incubating.metrics.gen_ai_metrics import (
GEN_AI_CLIENT_OPERATION_DURATION,
GEN_AI_CLIENT_TOKEN_USAGE,
)
# pylint: disable=too-many-branches, too-many-locals
@ -259,3 +271,107 @@ def assert_message_in_logs(log, event_name, expected_content, parent_span):
expected_content
), dict(log.log_record.body)
assert_log_parent(log, parent_span)
def assert_all_metric_attributes(
data_point, operation_name: str, model: str, error_type: str | None = None
):
assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
== operation_name
)
assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM]
== GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value
)
assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes
assert data_point.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == model
if error_type is not None:
assert ERROR_TYPE in data_point.attributes
assert data_point.attributes[ERROR_TYPE] == error_type
else:
assert ERROR_TYPE not in data_point.attributes
def assert_metrics(
resource_metrics: ResourceMetrics,
operation_name: str,
model: str,
input_tokens: float | None = None,
output_tokens: float | None = None,
error_type: str | None = None,
):
assert len(resource_metrics) == 1
metric_data = resource_metrics[0].scope_metrics[0].metrics
if input_tokens is not None or output_tokens is not None:
expected_metrics_data_len = 2
else:
expected_metrics_data_len = 1
assert len(metric_data) == expected_metrics_data_len
duration_metric = next(
(m for m in metric_data if m.name == GEN_AI_CLIENT_OPERATION_DURATION),
None,
)
assert duration_metric is not None
duration_point = duration_metric.data.data_points[0]
assert duration_point.sum > 0
assert_all_metric_attributes(
duration_point, operation_name, model, error_type
)
assert duration_point.explicit_bounds == tuple(
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
)
if input_tokens is not None:
token_usage_metric = next(
(m for m in metric_data if m.name == GEN_AI_CLIENT_TOKEN_USAGE),
None,
)
assert token_usage_metric is not None
input_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.INPUT.value
),
None,
)
assert input_token_usage is not None
assert input_token_usage.sum == input_tokens
assert input_token_usage.explicit_bounds == tuple(
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS
)
assert_all_metric_attributes(input_token_usage, operation_name, model)
if output_tokens is not None:
token_usage_metric = next(
(m for m in metric_data if m.name == GEN_AI_CLIENT_TOKEN_USAGE),
None,
)
assert token_usage_metric is not None
output_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
),
None,
)
assert output_token_usage is not None
assert output_token_usage.sum == output_tokens
assert output_token_usage.explicit_bounds == tuple(
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS
)
assert_all_metric_attributes(output_token_usage, operation_name, model)

View File

@ -17,6 +17,12 @@ from opentelemetry.sdk._logs.export import (
InMemoryLogExporter,
SimpleLogRecordProcessor,
)
from opentelemetry.sdk.metrics import (
MeterProvider,
)
from opentelemetry.sdk.metrics.export import (
InMemoryMetricReader,
)
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
@ -36,6 +42,12 @@ def fixture_log_exporter():
yield exporter
@pytest.fixture(scope="function", name="metric_reader")
def fixture_metric_reader():
reader = InMemoryMetricReader()
yield reader
@pytest.fixture(scope="function", name="tracer_provider")
def fixture_tracer_provider(span_exporter):
provider = TracerProvider()
@ -52,6 +64,15 @@ def fixture_event_logger_provider(log_exporter):
return event_logger_provider
@pytest.fixture(scope="function", name="meter_provider")
def fixture_meter_provider(metric_reader):
meter_provider = MeterProvider(
metric_readers=[metric_reader],
)
return meter_provider
@pytest.fixture
def bedrock_runtime_client():
return boto3.client("bedrock-runtime")
@ -81,7 +102,9 @@ def vcr_config():
@pytest.fixture(scope="function")
def instrument_no_content(tracer_provider, event_logger_provider):
def instrument_no_content(
tracer_provider, event_logger_provider, meter_provider
):
os.environ.update(
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"}
)
@ -90,6 +113,7 @@ def instrument_no_content(tracer_provider, event_logger_provider):
instrumentor.instrument(
tracer_provider=tracer_provider,
event_logger_provider=event_logger_provider,
meter_provider=meter_provider,
)
yield instrumentor
@ -98,7 +122,9 @@ def instrument_no_content(tracer_provider, event_logger_provider):
@pytest.fixture(scope="function")
def instrument_with_content(tracer_provider, event_logger_provider):
def instrument_with_content(
tracer_provider, event_logger_provider, meter_provider
):
os.environ.update(
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"}
)
@ -106,6 +132,7 @@ def instrument_with_content(tracer_provider, event_logger_provider):
instrumentor.instrument(
tracer_provider=tracer_provider,
event_logger_provider=event_logger_provider,
meter_provider=meter_provider,
)
yield instrumentor

View File

@ -34,6 +34,7 @@ from .bedrock_utils import (
assert_completion_attributes_from_streaming_body,
assert_converse_completion_attributes,
assert_message_in_logs,
assert_metrics,
assert_stream_completion_attributes,
)
@ -51,9 +52,11 @@ def filter_message_keys(message, keys):
def test_converse_with_content(
span_exporter,
log_exporter,
metric_reader,
bedrock_runtime_client,
instrument_with_content,
):
# pylint:disable=too-many-locals
messages = [{"role": "user", "content": [{"text": "Say this is a test"}]}]
llm_model_value = "amazon.titan-text-lite-v1"
@ -95,6 +98,13 @@ def test_converse_with_content(
}
assert_message_in_logs(logs[1], "gen_ai.choice", choice_body, span)
input_tokens = response["usage"]["inputTokens"]
output_tokens = response["usage"]["outputTokens"]
metrics = metric_reader.get_metrics_data().resource_metrics
assert_metrics(
metrics, "chat", llm_model_value, input_tokens, output_tokens
)
@pytest.mark.skipif(
BOTO3_VERSION < (1, 35, 56), reason="Converse API not available"
@ -103,6 +113,7 @@ def test_converse_with_content(
def test_converse_with_content_different_events(
span_exporter,
log_exporter,
metric_reader,
bedrock_runtime_client,
instrument_with_content,
):
@ -150,6 +161,13 @@ def test_converse_with_content_different_events(
}
assert_message_in_logs(logs[4], "gen_ai.choice", choice_body, span)
input_tokens = response["usage"]["inputTokens"]
output_tokens = response["usage"]["outputTokens"]
metrics = metric_reader.get_metrics_data().resource_metrics
assert_metrics(
metrics, "chat", llm_model_value, input_tokens, output_tokens
)
def converse_tool_call(
span_exporter, log_exporter, bedrock_runtime_client, expect_content
@ -452,6 +470,7 @@ def test_converse_tool_call_no_content(
def test_converse_with_invalid_model(
span_exporter,
log_exporter,
metric_reader,
bedrock_runtime_client,
instrument_with_content,
):
@ -479,6 +498,11 @@ def test_converse_with_invalid_model(
user_content = filter_message_keys(messages[0], ["content"])
assert_message_in_logs(logs[0], "gen_ai.user.message", user_content, span)
metrics = metric_reader.get_metrics_data().resource_metrics
assert_metrics(
metrics, "chat", llm_model_value, error_type="ValidationException"
)
@pytest.mark.skipif(
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"
@ -487,6 +511,7 @@ def test_converse_with_invalid_model(
def test_converse_stream_with_content(
span_exporter,
log_exporter,
metric_reader,
bedrock_runtime_client,
instrument_with_content,
):
@ -553,6 +578,11 @@ def test_converse_stream_with_content(
}
assert_message_in_logs(logs[1], "gen_ai.choice", choice_body, span)
metrics = metric_reader.get_metrics_data().resource_metrics
assert_metrics(
metrics, "chat", llm_model_value, input_tokens, output_tokens
)
@pytest.mark.skipif(
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"
@ -561,6 +591,7 @@ def test_converse_stream_with_content(
def test_converse_stream_with_content_different_events(
span_exporter,
log_exporter,
metric_reader,
bedrock_runtime_client,
instrument_with_content,
):
@ -614,6 +645,9 @@ def test_converse_stream_with_content_different_events(
}
assert_message_in_logs(logs[4], "gen_ai.choice", choice_body, span)
metrics = metric_reader.get_metrics_data().resource_metrics
assert_metrics(metrics, "chat", llm_model_value, mock.ANY, mock.ANY)
def _rebuild_stream_message(response):
message = {"content": []}
@ -986,6 +1020,7 @@ def test_converse_stream_no_content_tool_call(
def test_converse_stream_handles_event_stream_error(
span_exporter,
log_exporter,
metric_reader,
bedrock_runtime_client,
instrument_with_content,
):
@ -1039,6 +1074,11 @@ def test_converse_stream_handles_event_stream_error(
user_content = filter_message_keys(messages[0], ["content"])
assert_message_in_logs(logs[0], "gen_ai.user.message", user_content, span)
metrics = metric_reader.get_metrics_data().resource_metrics
assert_metrics(
metrics, "chat", llm_model_value, error_type="EventStreamError"
)
@pytest.mark.skipif(
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"