From c4d11f358ad6528111dcc9edf7a46fa6dc74ee42 Mon Sep 17 00:00:00 2001 From: RAJIB DEB Date: Sat, 13 Dec 2025 09:44:03 -0800 Subject: [PATCH 1/2] fix(telemetry): remove duplicate accumulation of usage metrics to prevent double counting --- src/strands/telemetry/tracer.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py index 2f42d9988..3c6f3c48b 100644 --- a/src/strands/telemetry/tracer.py +++ b/src/strands/telemetry/tracer.py @@ -666,19 +666,7 @@ def end_agent_span( event_attributes={"message": str(response), "finish_reason": str(response.stop_reason)}, ) - if hasattr(response, "metrics") and hasattr(response.metrics, "accumulated_usage"): - accumulated_usage = response.metrics.accumulated_usage - attributes.update( - { - "gen_ai.usage.prompt_tokens": accumulated_usage["inputTokens"], - "gen_ai.usage.completion_tokens": accumulated_usage["outputTokens"], - "gen_ai.usage.input_tokens": accumulated_usage["inputTokens"], - "gen_ai.usage.output_tokens": accumulated_usage["outputTokens"], - "gen_ai.usage.total_tokens": accumulated_usage["totalTokens"], - "gen_ai.usage.cache_read_input_tokens": accumulated_usage.get("cacheReadInputTokens", 0), - "gen_ai.usage.cache_write_input_tokens": accumulated_usage.get("cacheWriteInputTokens", 0), - } - ) + self._end_span(span, attributes, error) From 76a1577470e19fdf2e37553cbbb63efa408341bb Mon Sep 17 00:00:00 2001 From: poshinchen Date: Wed, 17 Dec 2025 14:55:22 -0500 Subject: [PATCH 2/2] add langfuse observation type to invoke_agent to prevent double counting --- src/strands/telemetry/tracer.py | 18 +++++++++++++++- tests/strands/telemetry/test_tracer.py | 30 ++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py index 3c6f3c48b..d16b37fc8 100644 --- a/src/strands/telemetry/tracer.py +++ b/src/strands/telemetry/tracer.py @@ -666,7 +666,23 @@ def end_agent_span( event_attributes={"message": str(response), "finish_reason": str(response.stop_reason)}, ) - + if hasattr(response, "metrics") and hasattr(response.metrics, "accumulated_usage"): + if "langfuse" in os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "") or "langfuse" in os.getenv( + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "" + ): + attributes.update({"langfuse.observation.type": "span"}) + accumulated_usage = response.metrics.accumulated_usage + attributes.update( + { + "gen_ai.usage.prompt_tokens": accumulated_usage["inputTokens"], + "gen_ai.usage.completion_tokens": accumulated_usage["outputTokens"], + "gen_ai.usage.input_tokens": accumulated_usage["inputTokens"], + "gen_ai.usage.output_tokens": accumulated_usage["outputTokens"], + "gen_ai.usage.total_tokens": accumulated_usage["totalTokens"], + "gen_ai.usage.cache_read_input_tokens": accumulated_usage.get("cacheReadInputTokens", 0), + "gen_ai.usage.cache_write_input_tokens": accumulated_usage.get("cacheWriteInputTokens", 0), + } + ) self._end_span(span, attributes, error) diff --git a/tests/strands/telemetry/test_tracer.py b/tests/strands/telemetry/test_tracer.py index 205748956..cb98b8130 100644 --- a/tests/strands/telemetry/test_tracer.py +++ b/tests/strands/telemetry/test_tracer.py @@ -794,6 +794,36 @@ def test_end_agent_span(mock_span): mock_span.end.assert_called_once() +def test_end_agent_span_with_langfuse_observation_type(mock_span, monkeypatch): + """Test ending an agent span with Langfuse observation type to prevent double counting the tokens.""" + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://us.cloud.langfuse.com") + tracer = Tracer() + + # Mock AgentResult with metrics + mock_metrics = mock.MagicMock() + mock_metrics.accumulated_usage = {"inputTokens": 50, "outputTokens": 100, "totalTokens": 150} + + mock_response = mock.MagicMock() + mock_response.metrics = mock_metrics + mock_response.stop_reason = "end_turn" + mock_response.__str__ = mock.MagicMock(return_value="Agent response") + + tracer.end_agent_span(mock_span, mock_response) + mock_span.set_attribute.assert_any_call("langfuse.observation.type", "span") + mock_span.set_attribute.assert_any_call("gen_ai.usage.prompt_tokens", 50) + mock_span.set_attribute.assert_any_call("gen_ai.usage.input_tokens", 50) + mock_span.set_attribute.assert_any_call("gen_ai.usage.output_tokens", 100) + mock_span.set_attribute.assert_any_call("gen_ai.usage.total_tokens", 150) + mock_span.set_attribute.assert_any_call("gen_ai.usage.cache_read_input_tokens", 0) + mock_span.set_attribute.assert_any_call("gen_ai.usage.cache_write_input_tokens", 0) + mock_span.add_event.assert_any_call( + "gen_ai.choice", + attributes={"message": "Agent response", "finish_reason": "end_turn"}, + ) + mock_span.set_status.assert_called_once_with(StatusCode.OK) + mock_span.end.assert_called_once() + + def test_end_agent_span_latest_conventions(mock_span, monkeypatch): """Test ending an agent span with the latest semantic conventions.""" monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental")