Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(anthropic): add response id attribute #2543

Merged
merged 3 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID
from opentelemetry.semconv_ai import (
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
LLMRequestTypeValues,
Expand Down Expand Up @@ -438,6 +439,7 @@ def _set_response_attributes(span, response):
if not isinstance(response, dict):
response = response.__dict__
set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, response.get("model"))
set_span_attribute(span, GEN_AI_RESPONSE_ID, response.get("id"))

if response.get("usage"):
prompt_tokens = response.get("usage").input_tokens
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
should_send_prompts,
)
from opentelemetry.metrics import Counter, Histogram
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID
from opentelemetry.semconv_ai import SpanAttributes
from opentelemetry.trace.status import Status, StatusCode

Expand All @@ -22,6 +23,7 @@ def _process_response_item(item, complete_response):
if item.type == "message_start":
complete_response["model"] = item.message.model
complete_response["usage"] = dict(item.message.usage)
complete_response["id"] = item.message.id
elif item.type == "content_block_start":
index = item.index
if len(complete_response.get("events")) <= index:
Expand Down Expand Up @@ -131,7 +133,7 @@ def build_from_streaming_response(
exception_counter: Counter = None,
kwargs: dict = {},
):
complete_response = {"events": [], "model": "", "usage": {}}
complete_response = {"events": [], "model": "", "usage": {}, "id": ""}
for item in response:
try:
yield item
Expand All @@ -143,7 +145,7 @@ def build_from_streaming_response(
_process_response_item(item, complete_response)

metric_attributes = shared_metrics_attributes(complete_response)

set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id"))
if duration_histogram:
duration = time.time() - start_time
duration_histogram.record(
Expand Down Expand Up @@ -206,7 +208,7 @@ async def abuild_from_streaming_response(
exception_counter: Counter = None,
kwargs: dict = {},
):
complete_response = {"events": [], "model": "", "usage": {}}
complete_response = {"events": [], "model": "", "usage": {}, "id": ""}
async for item in response:
try:
yield item
Expand All @@ -217,6 +219,8 @@ async def abuild_from_streaming_response(
raise e
_process_response_item(item, complete_response)

set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id"))

metric_attributes = shared_metrics_attributes(complete_response)

if duration_histogram:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def test_anthropic_completion(exporter, reader):
== f"{HUMAN_PROMPT}\nHello world\n{AI_PROMPT}"
)
assert anthropic_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
assert anthropic_span.attributes.get("gen_ai.response.id") == "compl_01EjfrPvPEsRDRUKD6VoBxtK"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -143,6 +144,7 @@ def test_anthropic_message_create(exporter, reader):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01TPXhkPo8jy6yQMrMhjpiAE"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -205,6 +207,7 @@ def test_anthropic_multi_modal(exporter):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01B37ySLPzYj8KY6uZmiPoxd"


@pytest.mark.vcr
Expand Down Expand Up @@ -264,6 +267,7 @@ async def test_anthropic_async_multi_modal(exporter):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01DWnmUo9hWk4Fk7V7Ddfa2w"


@pytest.mark.vcr
Expand Down Expand Up @@ -312,6 +316,7 @@ def test_anthropic_message_streaming(exporter, reader):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01MXWxhWoPSgrYhjTuMDM6F1"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -360,6 +365,7 @@ async def test_async_anthropic_message_create(exporter, reader):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01UFDDjsFn5BPQnfNwmsMnAY"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -414,6 +420,7 @@ async def test_async_anthropic_message_streaming(exporter, reader):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_016o6A7zDmgjucf5mWv1rrPD"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -554,6 +561,7 @@ def test_anthropic_tools(exporter, reader):
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.name"]) == response.content[2].name
response_input = json.dumps(response.content[2].input)
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.arguments"] == response_input)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01RBkXFe9TmDNNWThMz2HmGt"

# verify metrics
metrics_data = reader.get_metrics_data()
Expand Down Expand Up @@ -621,6 +629,8 @@ def test_anthropic_prompt_caching(exporter, reader):
cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
== cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
)
assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_01BQdyey3QZM7KW1nM7xXdjR"
assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_013eHbX6hmAnH7kp6NNp2qyL"

# first check that cache_creation_span only wrote to cache, but not read from it,
assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
Expand Down Expand Up @@ -707,6 +717,8 @@ async def test_anthropic_prompt_caching_async(exporter, reader):
cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
== cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
)
assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_018V6UftrxrM1Z5Rt8govzD1"
assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01MsbM1E6LPe7pz1NrkZEGev"

# first check that cache_creation_span only wrote to cache, but not read from it,
assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
Expand Down Expand Up @@ -796,6 +808,8 @@ def test_anthropic_prompt_caching_stream(exporter, reader):
cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
== cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
)
assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_015AgAJmgFQdYXgvvgbuKqPY"
assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01HSJ2wYvBf4DbFbwCaiGreG"

# first check that cache_creation_span only wrote to cache, but not read from it,
assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
Expand Down Expand Up @@ -877,6 +891,8 @@ async def test_anthropic_prompt_caching_async_stream(exporter, reader):
assert system_message == cache_creation_span.attributes["gen_ai.prompt.0.content"]
assert cache_read_span.attributes["gen_ai.prompt.0.role"] == "system"
assert system_message == cache_read_span.attributes["gen_ai.prompt.0.content"]
assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_01VBrTFKAYvujMd593dtpRHF"
assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01BqqtrPfxepxW2xPuZz1m6h"

assert cache_creation_span.attributes["gen_ai.prompt.1.role"] == "user"
assert text == cache_creation_span.attributes["gen_ai.prompt.1.content"]
Expand Down
Loading