Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
#!/usr/bin/env python3
"""
Example demonstrating Google GenAI instrumentation with image attachments.
This example shows that the instrumentation now properly handles:
- Part.from_bytes() for base64 encoded images
- Part.from_uri() for URI-referenced images
- PDF and other file attachments
"""

import asyncio
import base64
import os

import requests
from google import genai
from google.genai.types import Content, GenerateContentConfig, Part
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
OTLPSpanExporter, # type: ignore[import-not-found]
)
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor

from openinference.instrumentation.google_genai import GoogleGenAIInstrumentor

# Phoenix endpoint
endpoint = "http://localhost:6006/v1/traces"
tracer_provider = trace_sdk.TracerProvider()
tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))
tracer_provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter()))


def create_test_image_data() -> bytes:
"""Create a simple 1x1 pixel PNG for testing."""
base64_png = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" # noqa: E501
return base64.b64decode(base64_png)


def test_inline_data_image():
print("🖼️ Testing inline_data (Part.from_bytes) with image...")
try:
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

image_data = create_test_image_data()
image_part = Part.from_bytes(data=image_data, mime_type="image/png")

content = Content(
role="user",
parts=[
Part.from_text(text="Describe this image:"),
image_part,
],
)

config = GenerateContentConfig(
system_instruction="You are a helpful assistant. Describe what you see in images."
)

print(" Making API call with inline image data...")
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=content, # ✅ correct for 1.46.0
config=config, # ✅ correct for 1.46.0
)

print(f" ✅ Success! Response: {response.text[:100]}...")
return True

except Exception as e:
print(f" ❌ Error: {e}")
return False


def test_inline_data_pdf():
print("📄 Testing inline_data (Part.from_bytes) with PDF...")
try:
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

fake_pdf_data = (
b"%PDF-1.4\n"
b"1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n"
b"2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n"
b"""3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n/Contents <4 0 R>
\n>>\nendobj\n"""
b"""4 0 obj\n<<\n/Length 44\n>>\nstream\nBT\n/F1 12 Tf\n72 720 Td\n(Tes
t PDF content) Tj\nET\nendstream\nendobj\n"""
b"""xref\n0 5\n0000000000 65535 f \n0000000010 00000 n \n0000000053 00000 n \n0000000100
00000 n \n0000000178 00000 n \n"""
b"""trailer\n<<\n/Size 5\n/Root 1 0 R\n>>\nstartxref\n273\n%%EOF"""
)

pdf_part = Part.from_bytes(data=fake_pdf_data, mime_type="application/pdf")

content = Content(
role="user",
parts=[
Part.from_text(text="Analyze this PDF document:"),
pdf_part,
],
)

print(" Making API call with inline PDF data...")
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=content,
)

print(f" ✅ Success! Response: {response.text[:100]}...")
return True

except Exception as e:
print(f" ❌ Error: {e}")
return False


def test_file_data_uri():
print("🔗 Testing file_data (Part.from_uri equivalent via bytes)...")
try:
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

# fetch the image manually - using a simple, reliable image
img_bytes = requests.get(
"https://httpbin.org/image/png",
headers={"User-Agent": "Mozilla/5.0 (compatible; OpenInference-Test)"},
).content

image_part = Part.from_bytes(data=img_bytes, mime_type="image/png")

content = Content(
role="user",
parts=[
Part.from_text(text="What do you see in this image?"),
image_part,
],
)

response = client.models.generate_content(
model="gemini-2.0-flash",
contents=content,
)
print(f" ✅ Success! Response: {response.text[:100]}...")
return True

except Exception as e:
print(f" ❌ Error: {e}")
return False


async def test_async_with_images():
print("🔄 Testing async API with images...")
try:
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")).aio

image_data = create_test_image_data()
image_part = Part.from_bytes(data=image_data, mime_type="image/png")

content = Content(
role="user",
parts=[
Part.from_text(text="Describe this small test image:"),
image_part,
],
)

print(" Making async API call with image...")
response = await client.models.generate_content(
model="gemini-2.0-flash",
contents=content,
)

print(f" ✅ Success! Async response: {response.text[:100]}...")
return True

except Exception as e:
print(f" ❌ Error: {e}")
return False


def main():
print("🚀 Testing Google GenAI Instrumentation with Images")
print("=" * 60)
print("This demonstrates that the instrumentation now properly handles:")
print("- Part.from_bytes() for inline image/file data")
print("- Part.from_uri() for URI-referenced files")
print("- No more 'Other field types not supported' errors!")
print("=" * 60)

if not os.getenv("GEMINI_API_KEY"):
print("❌ Please set GEMINI_API_KEY environment variable")
print(" export GEMINI_API_KEY='your-api-key-here'")
return

print("🔧 Instrumenting Google GenAI client...")
GoogleGenAIInstrumentor().instrument(tracer_provider=tracer_provider)
print(" ✅ Instrumentation enabled - traces will be sent to Phoenix!\n")

results = [
test_inline_data_image(),
test_inline_data_pdf(),
test_file_data_uri(),
asyncio.run(test_async_with_images()),
]

print("=" * 60)
print("📊 RESULTS SUMMARY:")
print(f" ✅ Successful tests: {sum(results)}/{len(results)}")

if all(results):
print(" 🎉 All tests passed! The instrumentation fix is working!")
print(" 📈 Check Phoenix UI at http://localhost:6006 to see the traces")
else:
print(" ⚠️ Some tests failed - check API key and network connection")

print("=" * 60)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ instruments = [
]
test = [
"google-genai >= 1.8.0",
"opentelemetry-sdk",
"opentelemetry-sdk>=1.38.0",
"opentelemetry-exporter-otlp>=1.38.0",
]

[project.entry-points.opentelemetry_instrumentor]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
_io_value_and_type,
)
from openinference.semconv.trace import (
ImageAttributes,
MessageAttributes,
MessageContentAttributes,
OpenInferenceLLMProviderValues,
OpenInferenceSpanKindValues,
SpanAttributes,
Expand Down Expand Up @@ -383,6 +385,49 @@ def _get_attributes_from_function_response(
id,
)

def _get_attributes_from_inline_data(
self, inline_data: Any
) -> Iterator[Tuple[str, AttributeValue]]:
"""Handle inline data (base64 encoded content) from Part.from_bytes()"""
mime_type = get_attribute(inline_data, "mime_type", "unknown")
data = get_attribute(inline_data, "data")

if mime_type.startswith("image/"):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: MIME Type Handling Fails When None

In _get_attributes_from_inline_data and _get_attributes_from_file_data, the mime_type variable can be None if the underlying attribute exists but holds a None value. Since get_attribute's default only applies when an attribute is missing, calling startswith on mime_type in this case raises an AttributeError.

Additional Locations (1)

Fix in Cursor Fix in Web

# Use proper semantic conventions for images
if data:
import base64

base64_data = base64.b64encode(data).decode() if isinstance(data, bytes) else data
data_url = f"data:{mime_type};base64,{base64_data}"
yield (
f"{MessageContentAttributes.MESSAGE_CONTENT_IMAGE}.{ImageAttributes.IMAGE_URL}",
data_url,
)
else:
# Fallback for images without data
yield (MessageAttributes.MESSAGE_CONTENT, f"[Image: {mime_type}]")
else:
# For non-image files, use descriptive text (no specific semantic convention available)
data_size = len(data) if data else 0
yield (MessageAttributes.MESSAGE_CONTENT, f"[File: {mime_type}, {data_size} bytes]")

def _get_attributes_from_file_data(
self, file_data: Any
) -> Iterator[Tuple[str, AttributeValue]]:
"""Handle file data (URI references) from Part.from_uri()"""
file_uri = get_attribute(file_data, "file_uri", "unknown")
mime_type = get_attribute(file_data, "mime_type", "unknown")

if mime_type.startswith("image/"):
# Use proper semantic conventions for images
yield (
f"{MessageContentAttributes.MESSAGE_CONTENT_IMAGE}.{ImageAttributes.IMAGE_URL}",
file_uri,
)
else:
# For non-image files, use descriptive text (no specific semantic convention available)
yield (MessageAttributes.MESSAGE_CONTENT, f"[File: {mime_type} from {file_uri}]")

def _flatten_parts(self, parts: list[Part]) -> Iterator[Tuple[str, AttributeValue]]:
content_values = []
tool_call_index = 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will

 yield (
                    f"{MessageContentAttributes.MESSAGE_CONTENT_IMAGE}.{ImageAttributes.IMAGE_URL}",
                    data_url,
                )

get ignored or inadvertently converted into a singular message_content in _flatten_parts? Seems like we only check for certain attributes, otherwise we flatten into a single message. Don't we want to preserve this image_url attribute?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yess thanks for bringing this up, I have reviewed and updated the pr so that the attributes are exported and rendered correctly on phoenix as shown in latest screenshot above in description

Expand Down Expand Up @@ -427,8 +472,15 @@ def _get_attributes_from_part(
yield from self._get_attributes_from_function_call(function_call, tool_call_index)
elif function_response := get_attribute(part, "function_response"):
yield from self._get_attributes_from_function_response(function_response)
elif inline_data := get_attribute(part, "inline_data"):
# Handle base64 encoded content (Part.from_bytes())
yield from self._get_attributes_from_inline_data(inline_data)
elif file_data := get_attribute(part, "file_data"):
# Handle URI-referenced files (Part.from_uri())
yield from self._get_attributes_from_file_data(file_data)
else:
logger.exception("Other field types of parts are not supported yet")
# Change from exception to debug log for unknown part types
logger.debug(f"Unsupported part type encountered, skipping: {type(part)}")


T = TypeVar("T", bound=type)
Expand Down
Loading