Provide advisory histogram boundaries when creating OpenAI metrics (#3225)

* small fixes in OpenAI examples * up * add comment * leverage histogram bucket advice in 1.30 * Update instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py * Update instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py * up --------- Co-authored-by: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
2025-07-28 12:43:39 +08:00 · 2025-02-10 09:09:52 -08:00
parent c0bc2c9797
commit 231d26c4be
8 changed files with 100 additions and 116 deletions
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst
@ -75,48 +75,6 @@ To uninstrument clients, call the uninstrument method:
    # Uninstrument all clients
    OpenAIInstrumentor().uninstrument()
 Bucket Boundaries
 -----------------
 This section describes the explicit bucket boundaries for metrics such as token usage and operation duration, and guides users to create Views to implement them according to the semantic conventions.
 The bucket boundaries are defined as follows:
 - For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
 - For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]
 To implement these bucket boundaries, you can create Views in your OpenTelemetry SDK setup. Here is an example:
 .. code-block:: python
    from opentelemetry.sdk.metrics import MeterProvider, View
    from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
    from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
    from opentelemetry.sdk.metrics.aggregation import ExplicitBucketHistogramAggregation
    views = [
        View(
            instrument_name="gen_ai.client.token.usage",
            aggregation=ExplicitBucketHistogramAggregation([1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]),
        ),
        View(
            instrument_name="gen_ai.client.operation.duration",
            aggregation=ExplicitBucketHistogramAggregation([0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]),
        ),
    ]
    metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317")
    metric_reader = PeriodicExportingMetricReader(metric_exporter)
    provider = MeterProvider(
        metric_readers=[metric_reader],
        views=views
    )
    from opentelemetry.sdk.metrics import set_meter_provider
    set_meter_provider(provider)
 For more details, refer to the `OpenTelemetry GenAI Metrics documentation <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/>`_.
 References
 ----------
 * `OpenTelemetry OpenAI Instrumentation <https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation-genai/openai.html>`_
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/requirements.txt
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/requirements.txt
@ -1,5 +1,5 @@
 openai~=1.57.3
-opentelemetry-sdk~=1.29.0
+opentelemetry-sdk~=1.30.0
-opentelemetry-exporter-otlp-proto-grpc~=1.29.0
+opentelemetry-exporter-otlp-proto-grpc~=1.30.0
-opentelemetry-instrumentation-openai-v2~=2.0b0
+opentelemetry-instrumentation-openai-v2~=2.2b0
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/requirements.txt
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/requirements.txt
@ -1,6 +1,6 @@
 openai~=1.57.3
-opentelemetry-sdk~=1.29.0
+opentelemetry-sdk~=1.30.0
-opentelemetry-exporter-otlp-proto-grpc~=1.29.0
+opentelemetry-exporter-otlp-proto-grpc~=1.30.0
-opentelemetry-distro~=0.50b0
+opentelemetry-distro~=0.51b0
-opentelemetry-instrumentation-openai-v2~=2.0b0
+opentelemetry-instrumentation-openai-v2~=2.2b0
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/pyproject.toml
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/pyproject.toml
@ -26,9 +26,9 @@ classifiers = [
  "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-  "opentelemetry-api ~= 1.28",
+  "opentelemetry-api ~= 1.30",
-  "opentelemetry-instrumentation ~= 0.49b0",
+  "opentelemetry-instrumentation ~= 0.51b0",
-  "opentelemetry-semantic-conventions ~= 0.49b0"
+  "opentelemetry-semantic-conventions ~= 0.51b0"
 ]
 [project.optional-dependencies]
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py
@ -1,11 +1,52 @@
 from opentelemetry.metrics import Histogram, Meter
 from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
 _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
    0.01,
    0.02,
    0.04,
    0.08,
    0.16,
    0.32,
    0.64,
    1.28,
    2.56,
    5.12,
    10.24,
    20.48,
    40.96,
    81.92,
 ]
 _GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
    1,
    4,
    16,
    64,
    256,
    1024,
    4096,
    16384,
    65536,
    262144,
    1048576,
    4194304,
    16777216,
    67108864,
 ]
 class Instruments:
-    def __init__(self, meter):
+    def __init__(self, meter: Meter):
-        self.operation_duration_histogram = (
+        self.operation_duration_histogram: Histogram = meter.create_histogram(
-            gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
+            name=gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION,
            description="GenAI operation duration",
            unit="s",
            explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
        )
-        self.token_usage_histogram = (
+        self.token_usage_histogram: Histogram = meter.create_histogram(
-            gen_ai_metrics.create_gen_ai_client_token_usage(meter)
+            name=gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE,
            description="Measures number of input and output tokens used",
            unit="{token}",
            explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
        )
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/test-requirements-0.txt
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/test-requirements-0.txt
@ -8,9 +8,9 @@ pytest==7.4.4
 pytest-vcr==1.0.2
 pytest-asyncio==0.21.0
 wrapt==1.16.0
-opentelemetry-exporter-otlp-proto-http~=1.28
+opentelemetry-exporter-otlp-proto-http~=1.30
-opentelemetry-api==1.28  # when updating, also update in pyproject.toml
+opentelemetry-api==1.30  # when updating, also update in pyproject.toml
-opentelemetry-sdk==1.28  # when updating, also update in pyproject.toml
+opentelemetry-sdk==1.30  # when updating, also update in pyproject.toml
-opentelemetry-semantic-conventions==0.49b0  # when updating, also update in pyproject.toml
+opentelemetry-semantic-conventions==0.51b0  # when updating, also update in pyproject.toml
 -e instrumentation-genai/opentelemetry-instrumentation-openai-v2
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py
@ -18,16 +18,11 @@ from opentelemetry.sdk._logs.export import (
    SimpleLogRecordProcessor,
 )
 from opentelemetry.sdk.metrics import (
    Histogram,
    MeterProvider,
 )
 from opentelemetry.sdk.metrics.export import (
    InMemoryMetricReader,
 )
 from opentelemetry.sdk.metrics.view import (
    ExplicitBucketHistogramAggregation,
    View,
 )
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
@ -72,55 +67,8 @@ def fixture_event_logger_provider(log_exporter):
@pytest.fixture(scope="function", name="meter_provider")
 def fixture_meter_provider(metric_reader):
    token_usage_histogram_view = View(
        instrument_type=Histogram,
        instrument_name="gen_ai.client.token.usage",
        aggregation=ExplicitBucketHistogramAggregation(
            boundaries=[
                1,
                4,
                16,
                64,
                256,
                1024,
                4096,
                16384,
                65536,
                262144,
                1048576,
                4194304,
                16777216,
                67108864,
            ]
        ),
    )
    duration_histogram_view = View(
        instrument_type=Histogram,
        instrument_name="gen_ai.client.operation.duration",
        aggregation=ExplicitBucketHistogramAggregation(
            boundaries=[
                0.01,
                0.02,
                0.04,
                0.08,
                0.16,
                0.32,
                0.64,
                1.28,
                2.56,
                5.12,
                10.24,
                20.48,
                40.96,
                81.92,
            ]
        ),
    )
    meter_provider = MeterProvider(
        metric_readers=[metric_reader],
        views=[token_usage_histogram_view, duration_histogram_view],
    )
    return meter_provider
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py
@ -8,6 +8,39 @@ from opentelemetry.semconv._incubating.attributes import (
 )
 from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
 _DURATION_BUCKETS = (
    0.01,
    0.02,
    0.04,
    0.08,
    0.16,
    0.32,
    0.64,
    1.28,
    2.56,
    5.12,
    10.24,
    20.48,
    40.96,
    81.92,
 )
 _TOKEN_USAGE_BUCKETS = (
    1,
    4,
    16,
    64,
    256,
    1024,
    4096,
    16384,
    65536,
    262144,
    1048576,
    4194304,
    16777216,
    67108864,
 )
 def assert_all_metric_attributes(data_point):
    assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes
@ -77,8 +110,11 @@ def test_chat_completion_metrics(
        None,
    )
    assert duration_metric is not None
-    assert duration_metric.data.data_points[0].sum > 0
+
-    assert_all_metric_attributes(duration_metric.data.data_points[0])
+    duration_point = duration_metric.data.data_points[0]
    assert duration_point.sum > 0
    assert_all_metric_attributes(duration_point)
    assert duration_point.explicit_bounds == _DURATION_BUCKETS
    token_usage_metric = next(
        (
@ -101,7 +137,8 @@ def test_chat_completion_metrics(
    )
    assert input_token_usage is not None
    assert input_token_usage.sum == 12
-    # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
+
    assert input_token_usage.explicit_bounds == _TOKEN_USAGE_BUCKETS
    assert input_token_usage.bucket_counts[2] == 1
    assert_all_metric_attributes(input_token_usage)